1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-04 07:57:52 +03:00

Merge branch 'master' into leader

This commit is contained in:
Guillaume Ayoub 2021-03-11 15:48:31 +01:00 committed by GitHub
commit 1882174a57
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
59 changed files with 5164 additions and 3279 deletions

View File

@ -8,7 +8,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, macos-latest, windows-latest]
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.6, 3.7, 3.8, 3.9, pypy3]
exclude:
# Pillow wheel missing for this configuration
@ -22,9 +22,9 @@ jobs:
- uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install DejaVu (Ubuntu)
if: matrix.os == 'ubuntu-20.04'
run: sudo apt-get update -y && sudo apt-get install fonts-dejavu -y
- name: Install DejaVu and Ghostscript (Ubuntu)
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get update -y && sudo apt-get install fonts-dejavu ghostscript -y
- name: Install DejaVu, Pango, libffi and Ghostscript (MacOS)
if: matrix.os == 'macos-latest'
run: |

21
.gitignore vendored
View File

@ -1,22 +1,9 @@
# Python Bytecode
*.pyc
# Building and Distributing
/*.egg-info
/.eggs
.cache
/.coverage
/build
/dist
/docs/_build
# Various Tools
/.coverage
/coverage.xml
/htmlcov
/env
/venv
/.vagrant
/.cache
# Tests
/.pytest_cache
/pytest_cache
/tests/test_draw/results
/venv

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

View File

@ -1,24 +0,0 @@
.wy-nav-content {
height: auto;
min-height: 100%;
}
@media print {
.wy-grid-for-nav {
height: auto;
position: static;
}
}
.wy-side-nav-search, .wy-nav-side {
background-color: #14213d;
}
.wy-side-nav-search input[type=text] {
border-radius: 0;
}
.wy-menu-vertical a:hover {
background: #1ee494;
color: #14213d;
}

BIN
docs/_static/icon.ico vendored

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

14
docs/_static/logo.svg vendored
View File

@ -1,14 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="779.48822pt" height="129.40233pt" viewBox="0 0 779.48824 129.40233" version="1.1">
<path style="fill:#1ce391" d="M 23.60937,69.16797 1.08203,46.64063 C 0.35937,45.91797 0,45.19531 0,44.47266 0,43.75 0.40234,43.02736 1.20703,42.30078 l 2.76953,-2.40625 c 0.80078,-0.64062 1.48437,-0.96483 2.04687,-0.96483 0.72266,0 1.44532,0.36328 2.16798,1.08203 l 21.3203,21.20703 c 1.3672,1.36328 2.41016,2.64844 3.13673,3.85156 0.71875,1.20704 1.08202,2.57421 1.08202,4.09766 0,1.4453 -0.36327,2.76953 -1.08202,3.97655 -0.72657,1.20313 -1.76953,2.48829 -3.13673,3.85158 L 8.19141,98.32032 c -0.64454,0.64454 -1.3672,0.96484 -2.16798,0.96484 -0.72265,0 -1.40625,-0.28125 -2.04687,-0.84375 L 1.20703,95.91407 C 0.40234,95.18751 0,94.46486 0,93.7422 0,92.94141 0.35937,92.25782 1.08203,91.69532 Z m 0,0" />
<path style="fill:#f5f5f5" d="m 107.11327,99.88673 h -1.92578 c -3.3711,0 -5.74219,-1.44531 -7.10547,-4.33593 L 81.81639,63.02345 65.55467,95.5508 c -1.36718,2.89062 -3.73437,4.33593 -7.10547,4.33593 h -1.92968 c -2.01172,0 -3.09375,-1.0039 -3.25391,-3.01171 L 50.85936,42.1797 c -0.082,-0.8789 0.19531,-1.625 0.83984,-2.22656 0.64063,-0.60156 1.40625,-0.90234 2.28907,-0.90234 h 4.09765 c 0.88282,0 1.58203,0.28125 2.10938,0.84375 0.51953,0.5625 0.82031,1.32422 0.90234,2.28515 L 63.26561,80.01173 76.6367,53.26564 c 0.64454,-1.44531 1.80469,-2.16797 3.4961,-2.16797 h 3.37109 c 1.6875,0 2.85156,0.72266 3.49219,2.16797 l 13.37109,26.74609 2.16797,-37.83203 c 0.082,-0.96093 0.38281,-1.72265 0.90625,-2.28515 0.52344,-0.5625 1.22266,-0.84375 2.10938,-0.84375 h 4.09765 c 0.87891,0 1.625,0.30078 2.22657,0.90234 0.60156,0.60156 0.90234,1.34766 0.90234,2.22656 l -2.41016,54.69532 c 0,2.00781 -1.08593,3.01171 -3.2539,3.01171" />
<path style="fill:#f5f5f5" d="m 143.19921,50.85548 c -2.73047,2.57421 -4.29688,6.10546 -4.69532,10.60156 h 27.34375 c 0,-3.05078 -0.68359,-5.66016 -2.04687,-7.83203 -1.36719,-2.16797 -3.07422,-3.8125 -5.12109,-4.9375 -2.04688,-1.125 -4.15625,-1.6875 -6.32422,-1.6875 -3.3711,0 -6.42578,1.28515 -9.15625,3.85547 m 33.37109,43.49218 c 0,1.20313 -0.64453,2.04688 -1.92969,2.52735 -5.62109,2.41015 -11.64453,3.61328 -18.0664,3.61328 -8.4336,0 -15.42188,-2.54688 -20.96485,-7.64844 -5.53906,-5.09766 -8.30859,-12.98828 -8.30859,-23.67187 0,-6.50391 1.12109,-12.125 3.37109,-16.86719 2.2461,-4.73828 5.25782,-8.32813 9.03516,-10.78125 3.77344,-2.44922 7.95312,-3.67578 12.53125,-3.67578 4.17187,0 8.08984,1.0664 11.74609,3.19531 3.64844,2.12891 6.58203,5.23828 8.79297,9.33594 2.20703,4.09765 3.3125,8.95703 3.3125,14.57422 v 2.77343 c 0,2.00782 -1.00781,3.01172 -3.01172,3.01172 h -35.41797 c 0.24219,7.06641 2.1836,12.22656 5.84375,15.48047 3.65235,3.25 8.16797,4.87891 13.55079,4.87891 4.17578,0 8.87109,-0.92188 14.09375,-2.77344 0.32031,-0.15625 0.76562,-0.23828 1.32421,-0.23828 1.20704,0 2.05079,0.64453 2.53125,1.92578 l 1.20313,2.89062 c 0.24219,0.48438 0.36328,0.96485 0.36328,1.44922" />
<path style="fill:#f5f5f5" d="m 206.30078,77.35939 c -2.60936,1.60937 -3.91407,3.57422 -3.91407,5.90234 0,2.33203 1.32423,4.27735 3.97657,5.84375 2.65235,1.56641 6.46485,2.34766 11.44531,2.34766 2.24609,0 6.01954,-0.23828 11.32422,-0.72266 V 74.94923 h -11.6875 c -4.8164,0 -8.53515,0.80469 -11.14452,2.41016 m 30.17968,21.68359 c -7.94921,0.96485 -14.41796,1.44532 -19.39453,1.44532 -5.22265,0 -9.74218,-0.78125 -13.55468,-2.34766 -3.81642,-1.56641 -6.6836,-3.67578 -8.61329,-6.32422 -1.92578,-2.65234 -2.89063,-5.54297 -2.89063,-8.67187 0,-3.13282 0.96485,-6.02735 2.89063,-8.67579 1.92969,-2.65234 4.81641,-4.77734 8.67579,-6.38281 3.85546,-1.60937 8.46875,-2.41404 13.85156,-2.41404 h 11.6875 V 60.8555 c 0,-5.05859 -1.32813,-8.65234 -3.97656,-10.78125 -2.64844,-2.12892 -6.58594,-3.19531 -11.80469,-3.19531 -3.29688,0 -6.94923,0.40625 -10.96485,1.20703 -0.24218,0.082 -0.5625,0.1211 -0.96093,0.1211 -1.52735,0 -2.41016,-0.76563 -2.65235,-2.28907 l -0.35938,-3.25392 v -0.72266 c 0,-0.80078 0.1797,-1.42578 0.53907,-1.86719 0.35938,-0.4375 0.94531,-0.74218 1.74608,-0.90234 5.22267,-1.04297 10.20313,-1.56641 14.9375,-1.56641 7.79298,0 13.69532,1.84766 17.71095,5.53907 4.01562,3.69921 6.02343,9.59765 6.02343,17.71093 v 34.9336 c 0,1.92968 -0.96483,3.01562 -2.89063,3.2539" />
<path style="fill:#f5f5f5" d="m 294.33984,48.08595 c -4.57813,-0.72266 -8.19141,-1.08594 -10.84375,-1.08594 -4.01563,0 -7.39063,0.66407 -10.11719,1.98828 -2.73437,1.32422 -4.09766,3.19532 -4.09766,5.60157 0,2.08984 1.08594,3.9375 3.25391,5.54297 2.16797,1.60546 5.17969,2.64843 9.03125,3.13281 7.14844,0.88281 12.47266,2.97265 15.96484,6.26172 3.49219,3.29687 5.23829,7.35156 5.23829,12.16797 0,5.625 -2.20704,10.16015 -6.625,13.61328 -4.41797,3.45312 -10.64063,5.17968 -18.67188,5.17968 -5.37891,0 -10.80078,-0.4414 -16.26172,-1.32421 -1.6875,-0.24219 -2.53125,-1.125 -2.53125,-2.64844 0,-0.40235 0.0391,-0.72656 0.1211,-0.96485 l 0.72265,-3.13281 c 0.32031,-1.84765 1.48438,-2.64844 3.4961,-2.41015 4.65625,0.80468 9.47265,1.20703 14.45312,1.20703 4.82031,0 8.53125,-0.88282 11.14453,-2.65235 2.60938,-1.76562 3.91406,-4.05468 3.91406,-6.86718 0,-2.32813 -0.98437,-4.33594 -2.95312,-6.01954 -1.96875,-1.6914 -5.03906,-2.73046 -9.21484,-3.13671 -6.66797,-0.64063 -11.88672,-2.62891 -15.66016,-5.96094 -3.77344,-3.33203 -5.66016,-7.32813 -5.66016,-11.98828 0,-4.89844 2.04688,-8.91407 6.14453,-12.04688 4.09375,-3.13281 9.79688,-4.69922 17.10547,-4.69922 4.73828,0 9.47657,0.44532 14.21485,1.32813 1.60547,0.23828 2.41015,1.125 2.41015,2.64844 0,0.40625 -0.043,0.72265 -0.12109,0.96484 l -0.84375,3.13281 c -0.48047,1.76563 -1.6875,2.48828 -3.61328,2.16797" />
<path style="fill:#f5f5f5" d="m 370.66015,39.71095 c 0.48047,0.44531 0.72266,1.02734 0.72266,1.75 0,0.40234 -0.043,0.72265 -0.1211,0.96094 l -14.9375,59.27343 c -2.49218,9.79688 -5.78125,16.86328 -9.875,21.20313 -4.10156,4.33594 -9.64062,6.5039 -16.6289,6.5039 -4.33594,0 -9.23438,-0.96484 -14.69532,-2.89062 -1.36718,-0.48047 -2.05078,-1.36719 -2.05078,-2.65234 0,-0.16016 0.082,-0.5625 0.24219,-1.20313 l 1.08594,-3.01172 c 0.48047,-1.36719 1.32422,-2.04687 2.52734,-2.04687 0.5625,0 1.00391,0.0781 1.32422,0.23828 1.76953,0.64453 3.75781,1.14453 5.96484,1.50781 2.21094,0.35938 4.07422,0.54297 5.60157,0.54297 4.17578,0 7.55078,-1.52734 10.12109,-4.57813 2.57031,-3.05468 4.69922,-7.99218 6.38281,-14.82031 l -1.5664,0 c -3.9336,0 -7.16797,-0.92187 -9.69532,-2.76953 -2.53125,-1.84766 -4.23828,-4.53516 -5.12109,-8.07031 L 318.13671,42.42189 c -0.082,-0.23829 -0.12109,-0.5586 -0.12109,-0.96094 0,-1.60547 0.92187,-2.41016 2.76953,-2.41016 h 4.33984 c 1.84375,0 2.92969,0.84375 3.25,2.53125 l 11.6836,46.5 c 0.24219,1.125 0.76562,2.00781 1.57031,2.64844 0.80078,0.64453 1.84375,0.96484 3.12891,0.96484 l 3.73437,0 12.53125,-50.11328 c 0.48047,-1.6875 1.60547,-2.53125 3.375,-2.53125 h 4.33594 c 0.80078,0 1.44531,0.22266 1.92578,0.66016" />
<path style="fill:#f5f5f5" d="m 406.55467,91.57422 c 6.01954,0 10.88282,-1.70312 14.57422,-5.12108 3.69532,-3.41017 5.54297,-9.17188 5.54297,-17.28517 0,-7.71094 -1.70703,-13.28906 -5.12109,-16.74608 -3.41406,-3.44923 -8.01172,-5.17969 -13.79297,-5.17969 -3.05469,0 -6.14453,0.35938 -9.27734,1.08594 v 42.40233 c 3.05078,0.5625 5.74218,0.84375 8.07421,0.84375 M 389.80858,41.33985 c 5.78125,-2.32813 11.76563,-3.4961 17.94923,-3.4961 8.43358,0 15.42187,2.55077 20.96094,7.65235 5.54297,5.10156 8.3125,12.99218 8.3125,23.67187 0,10.68361 -2.76953,18.57423 -8.3125,23.67188 -5.53907,5.10156 -12.52736,7.64843 -20.96094,7.64843 -2.97267,0 -6.06642,-0.32031 -9.27734,-0.96093 v 26.86328 c 0,2.00781 -1.00391,3.01172 -3.01172,3.01172 h -4.21486 c -2.00781,0 -3.01172,-1.00391 -3.01172,-3.01172 V 43.86719 c 0,-1.20313 0.52344,-2.04688 1.56641,-2.52734" />
<path style="fill:#f5f5f5" d="m 457.96093,39.29298 c 6.42578,-0.96485 13.25391,-1.44922 20.48047,-1.44922 6.34375,0 11.34375,0.64453 15,1.92969 3.65234,1.28515 6.32422,3.33203 8.01172,6.14453 1.68359,2.8125 2.52734,6.625 2.52734,11.44531 0,2.00781 -1.0039,3.01172 -3.01172,3.01172 h -4.21484 c -2.00781,0 -3.01562,-1.00391 -3.01562,-3.01172 0,-3.77344 -1.16016,-6.44531 -3.48829,-8.01172 -2.33203,-1.56641 -6.26953,-2.35156 -11.80859,-2.35156 -3.85156,0 -8.19141,0.16406 -13.00781,0.48437 v 48.78907 c 0,2.00781 -1.00781,3.01171 -3.01172,3.01171 h -4.21875 c -2.01172,0 -3.01172,-1.0039 -3.01172,-3.01171 V 42.54298 c 0,-1.76563 0.92188,-2.84766 2.76953,-3.25" />
<path style="fill:#f5f5f5" d="m 544.73827,10.73829 v 7.95313 c 0,2.00781 -1.0039,3.01172 -3.01562,3.01172 h -7.82422 c -2.01172,0 -3.01563,-1.00391 -3.01563,-3.01172 v -7.95313 c 0,-2.0039 1.00391,-3.01172 3.01563,-3.01172 h 7.82422 c 2.01172,0 3.01562,1.00782 3.01562,3.01172 m 19.27344,82.28125 v 3.25391 c 0,2.00781 -1.00391,3.01172 -3.00781,3.01172 h -42.76953 c -2.00782,0 -3.01172,-1.00391 -3.01172,-3.01172 v -3.25391 c 0,-2.0039 1.0039,-3.01172 3.01172,-3.01172 h 16.26562 V 48.20704 h -12.28906 c -2.01172,0 -3.01172,-1.0039 -3.01172,-3.01172 v -3.13281 c 0,-2.00781 1,-3.01172 3.01172,-3.01172 h 19.51172 c 2.01172,0 3.01562,1.00391 3.01562,3.01172 v 47.94531 h 16.26563 c 2.0039,0 3.00781,1.00782 3.00781,3.01172" />
<path style="fill:#f5f5f5" d="m 582.78515,39.29296 c 6.42577,-0.96484 13.2539,-1.44921 20.47656,-1.44921 9,0 15.50391,1.86719 19.51954,5.60546 4.01562,3.73048 6.02342,9.98048 6.02342,18.73048 v 34.09375 c 0,2.00781 -1.00389,3.01172 -3.01171,3.01172 h -4.21875 c -2.00781,0 -3.01171,-1.00391 -3.01171,-3.01172 V 62.17969 c 0,-5.62111 -1.16408,-9.55469 -3.49219,-11.80469 -2.33203,-2.25 -6.26563,-3.375 -11.8086,-3.375 -3.85156,0 -8.1875,0.16406 -13.00781,0.48437 v 48.78907 c 0,2.00781 -1.0039,3.01172 -3.01173,3.01172 h -4.21483 c -2.01172,0 -3.01172,-1.00391 -3.01172,-3.01172 V 42.54297 c 0,-1.76563 0.92187,-2.84766 2.76951,-3.25" />
<path style="fill:#f5f5f5" d="m 693.54687,95.67188 c 0,1.125 -0.68358,1.92578 -2.04687,2.41015 -4.41796,1.60547 -8.63671,2.41016 -12.64843,2.41016 -6.4297,0 -11.6875,-2.05078 -15.78125,-6.14453 C 658.97266,90.25 656.92579,83.70703 656.92579,74.71094 V 48.20703 h -9.15625 c -2.00781,0 -3.01172,-1.0039 -3.01172,-3.01171 V 42.0625 c 0,-2.00781 1.00391,-3.01172 3.01172,-3.01172 h 9.15625 V 22.78907 c 0,-2.00782 1,-3.01563 3.01171,-3.01563 h 4.21485 c 2.00781,0 3.01172,1.00781 3.01172,3.01563 v 16.26171 h 19.7578 c 2.00782,0 3.01173,1.00391 3.01173,3.01172 v 3.13282 c 0,2.00781 -1.00391,3.01171 -3.01173,3.01171 h -19.7578 v 26.50391 c 0,6.10156 1.00391,10.35937 3.01172,12.76953 2.00781,2.41016 4.89844,3.61327 8.67578,3.61327 2.96875,0 5.98047,-0.55858 9.03125,-1.6875 0.3203,-0.16014 0.80468,-0.24219 1.44922,-0.24219 1.12108,0 1.96483,0.64455 2.52733,1.92969 l 1.32813,3.13281 c 0.23829,0.48048 0.35937,0.96094 0.35937,1.44532" />
<path style="fill:#1ce391" d="m 777.46874,1e-5 h -64.75 c -1.11718,0 -2.01953,0.90234 -2.01953,2.01953 v 125.02734 c 0,1.11328 0.90235,2.01953 2.01953,2.01953 h 64.75 c 1.11719,0 2.01953,-0.90625 2.01953,-2.01953 V 2.01954 c 0,-1.11719 -0.90234,-2.01953 -2.01953,-2.01953" />
</svg>

Before

Width:  |  Height:  |  Size: 11 KiB

View File

@ -1,69 +0,0 @@
API
===
API stability
-------------
Everything described here is considered “public”: this is what you can rely
on. We will try to maintain backward-compatibility, and we really often do, but
there is no hard promise.
Anything else should not be used outside of WeasyPrint itself. We reserve
the right to change it or remove it at any point. Use it at your own risk,
or have dependency to a specific WeasyPrint version.
Versioning
----------
Since version 43, WeasyPrint only provides major releases and does not follow
semantic versioning. This choice may look odd, but it is close to what many
browsers do, including Firefox and Chrome.
Even if each version does not break the API, each version does break the way
documents are rendered, which is what really matters at the end. Providing
minor versions would give the illusion that developers can just update
WeasyPrint without checking that everything works.
Unfortunately, we have the same problem as the other browsers: when a new
version is released, most of the user's websites are rendered exactly the same,
but a small part is not. And the only ways to know that, for web developers,
are to read the changelog and to check that their pages are correctly rendered.
More about this choice can be found in
`issue #900 <https://github.com/Kozea/WeasyPrint/issues/900>`_.
.. _command-line-api:
Command-line API
----------------
.. autofunction:: weasyprint.__main__.main(argv=sys.argv)
.. module:: weasyprint
.. _python-api:
Python API
----------
.. autoclass:: HTML(input, **kwargs)
:members:
.. autoclass:: CSS(input, **kwargs)
.. autoclass:: Attachment(input, **kwargs)
.. autofunction:: default_url_fetcher
.. module:: weasyprint.document
.. autoclass:: Document
:members:
.. autoclass:: DocumentMetadata()
:members:
.. autoclass:: Page()
:members:
.. module:: weasyprint.fonts
.. autoclass:: FontConfiguration()
.. module:: weasyprint.css.counters
.. autoclass:: CounterStyle()

View File

@ -1,27 +1,101 @@
Features
========
API Reference
=============
This page is for WeasyPrint |version|. See :doc:`changelog </changelog>`
for older versions.
.. currentmodule:: weasyprint
This page is for WeasyPrint |version|. See :doc:`changelog </changelog>` for
older versions.
API Stability
-------------
Everything described here is considered “public”: this is what you can rely
on. We will try to maintain backward-compatibility, and we really often do, but
there is no hard promise.
Anything else should not be used outside of WeasyPrint itself. We reserve
the right to change it or remove it at any point. Use it at your own risk,
or have dependency to a specific WeasyPrint version.
Versioning
----------
WeasyPrint provides frequent major releases, and minor releases with only bug
fixes. Versioning is close to what many browsers do, including Firefox and
Chrome: big major numbers, small minor numbers.
Even if each version does not break the API, each version does break the way
documents are rendered, which is what really matters at the end. Providing
minor versions would give the illusion that developers can just update
WeasyPrint without checking that everything works.
Unfortunately, we have the same problem as the other browsers: when a new
version is released, most of the user's websites are rendered exactly the same,
but a small part is not. And the only ways to know that, for web developers,
are to read the changelog and to check that their pages are correctly rendered.
More about this choice can be found in
issue `#900`_.
.. _#900: https://github.com/Kozea/WeasyPrint/issues/900
Command-line API
----------------
.. autofunction:: weasyprint.__main__.main(argv=sys.argv)
Python API
----------
.. autoclass:: HTML(input, **kwargs)
:members:
.. autoclass:: CSS(input, **kwargs)
.. autoclass:: Attachment(input, **kwargs)
.. autofunction:: default_url_fetcher
.. module:: weasyprint.document
.. autoclass:: Document
:members:
.. autoclass:: DocumentMetadata()
:members:
.. autoclass:: Page()
:members:
.. module:: weasyprint.text.fonts
.. autoclass:: FontConfiguration()
.. module:: weasyprint.css.counters
.. autoclass:: CounterStyle()
Supported Features
------------------
URLs
----
~~~~
WeasyPrint can read normal files, HTTP, FTP and `data URIs`_. It will follow
HTTP redirects but more advanced features like cookies and authentication
are currently not supported, although a custom :ref:`url fetcher
<url-fetchers>` can help.
are currently not supported, although a custom :ref:`URL fetcher
<URL Fetchers>` can help.
.. _data URIs: http://en.wikipedia.org/wiki/Data_URI_scheme
HTML
----
~~~~
Supported HTML Tags
+++++++++++++++++++
Many HTML elements are implemented in CSS through the HTML5
`User-Agent stylesheet
<https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css>`_.
`User-Agent stylesheet`_.
Some elements need special treatment:
@ -46,11 +120,32 @@ HTML, including font ``color`` and ``size``, list attributes like ``type`` and
generated by WeasyPrint is missing some of the features you expect from the
HTML, try to enable this option.
.. _User-Agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css
.. _presentational hints: http://www.w3.org/TR/html5/rendering.html#presentational-hints
.. _Pillow: https://python-pillow.org/
Stylesheet Origins
++++++++++++++++++
HTML documents are rendered with stylesheets from three *origins*:
* The HTML5 `user agent stylesheet`_ (defines the default appearance
of HTML elements);
* Author stylesheets embedded in the document in ``<style>`` elements
or linked by ``<link rel=stylesheet>`` elements;
* User stylesheets provided in the API.
Keep in mind that *user* stylesheets have a lower priority than *author*
stylesheets in the cascade_, unless you use `!important`_ in declarations
to raise their priority.
.. _user agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css
.. _cascade: http://www.w3.org/TR/CSS21/cascade.html#cascading-order
.. _!important: http://www.w3.org/TR/CSS21/cascade.html#important-rules
PDF
---
~~~
In addition to text, raster and vector graphics, WeasyPrints PDF files
can contain hyperlinks, bookmarks and attachments.
@ -64,7 +159,7 @@ point to http://weasyprint.org/news/ in PDF files.
PDF bookmarks are also called outlines and are generally shown in a
sidebar. Clicking on an entry scrolls the matching part of the document
into view. By default all ``<h1>`` to ``<h6>`` titles generate bookmarks,
but this can be controlled with CSS (see :ref:`Bookmarks <bookmarks>`.)
but this can be controlled with `PDF bookmarks`_.)
Attachments are related files, embedded in the PDF itself. They can be
specified through ``<link rel=attachment>`` elements to add resources globally
@ -74,31 +169,23 @@ description of the attachment.
Fonts
-----
~~~~~
WeasyPrint can use any font that Pango can find installed on the system. Fonts
are automatically embedded in PDF files.
On Linux, Pango uses fontconfig to access fonts. You can list the available
fonts thanks to the ``fc-list`` command, and know which font is matched by a
given pattern thanks to ``fc-match``. Copying a font file into the
``~/.local/share/fonts`` or ``~/.fonts`` directory is generally enough to
install a new font. WeasyPrint should support `any font format handled by
FreeType <https://en.wikipedia.org/wiki/FreeType#File_formats>`_.
Pango always uses fontconfig to access fonts, even on Windows and macOS. You
can list the available fonts thanks to the ``fc-list`` command, and know which
font is matched by a given pattern thanks to ``fc-match``. Copying a font file
into the ``~/.local/share/fonts`` or ``~/.fonts`` directory is generally enough
to install a new font. WeasyPrint should support `any font format handled by
FreeType`_.
On Windows and macOS, **Pango >= 1.38** is required to use fontconfig and
FreeType like it does on Linux. Both, ``fc-list`` and ``fc-match`` probably
will be present, too. Installing new fonts on your system as usual should make
them available to Pango.
Otherwise (Pango < 1.38) on Windows and macOS, the native font-managing
libraries are used. You must then use the tools provided by your OS to know
which fonts are available. WeasyPrint should support any font format thats
supported by the operating system.
.. _any font format handled by FreeType: https://en.wikipedia.org/wiki/FreeType#File_formats
CSS
---
~~~
WeasyPrint supports many of the `CSS specifications`_ written by the W3C. You
will find in this chapter a comprehensive list of the specifications or drafts
@ -112,9 +199,8 @@ can be useful if you want to implement new features in WeasyPrint.
.. _test.weasyprint.org: http://test.weasyprint.org/
.. _WeasySuite: https://github.com/Kozea/WeasySuite
CSS Level 2 Revision 1
~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++
The `CSS Level 2 Revision 1`_ specification, best known as CSS 2.1, is pretty
well supported by WeasyPrint. Since version 0.11, it passes the famous `Acid2
@ -148,9 +234,8 @@ To the best of our knowledge, everything else that applies to the
print media **is** supported. Please report a bug if you find this list
incomplete.
Selectors Level 3
~~~~~~~~~~~~~~~~~
+++++++++++++++++
With the exceptions noted here, all `Selectors Level 3`_ are supported.
@ -160,9 +245,8 @@ never match anything.
.. _Selectors Level 3: http://www.w3.org/TR/css3-selectors/
CSS Text Module Level 3 / 4
~~~~~~~~~~~~~~~~~~~~~~~~~~~
+++++++++++++++++++++++++++
The `CSS Text Module Level 3`_ and `CSS Text Module Level 4`_ are working
drafts defining "properties for text manipulation" and covering "line breaking,
@ -187,8 +271,7 @@ Experimental_ properties controling hyphenation_ are supported by WeasyPrint:
To get automatic hyphenation, you to set it to ``auto``
*and* have the ``lang`` HTML attribute set to one of the languages
`supported by Pyphen
<https://github.com/Kozea/Pyphen/tree/master/pyphen/dictionaries>`_.
`supported by Pyphen`_.
.. code-block:: html
@ -225,14 +308,13 @@ supported:
- the ``pre-wrap-auto`` value of the ``white-space`` property; and
- the ``text-spacing`` property.
.. _supported by Pyphen: https://github.com/Kozea/Pyphen/tree/master/pyphen/dictionaries
.. _hyphenation: http://www.w3.org/TR/css3-text/#hyphenation
.. _CSS Text Module Level 3: https://www.w3.org/TR/css-text-3/
.. _CSS Text Module Level 4: https://www.w3.org/TR/css-text-4/
.. _hyphenation: http://www.w3.org/TR/css3-text/#hyphenation
CSS Fonts Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++
The `CSS Fonts Module Level 3`_ is a candidate recommendation describing "how
font properties are specified and how font resources are loaded dynamically".
@ -266,9 +348,8 @@ The ``font-variant-caps`` property is supported but needs the small-caps variant
the font to be installed. WeasyPrint does **not** simulate missing small-caps
fonts.
CSS Paged Media Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++
The `CSS Paged Media Module Level 3`_ is a working draft including features for
paged media "describing how:
@ -292,9 +373,8 @@ All the features of this draft are available, including:
.. _CSS Paged Media Module Level 3: http://dev.w3.org/csswg/css3-page/
.. _#93: https://github.com/Kozea/WeasyPrint/issues/93
CSS Generated Content for Paged Media Module
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++++++++++
The `CSS Generated Content for Paged Media Module`_ (GCPM) is a working draft
defining "new properties and values, so that authors may bring new techniques
@ -322,9 +402,8 @@ The other features of GCPM are **not** implemented:
.. _Page selectors: https://www.w3.org/TR/css-gcpm-3/#document-page-selectors
.. _running elements: https://www.w3.org/TR/css-gcpm-3/#running-elements
CSS Generated Content Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++
The `CSS Generated Content Module Level 3`_ is a working draft helping "authors
[who] sometimes want user agents to render content that does not come from the
@ -355,10 +434,7 @@ identifiers) in the current document:
a::after { content: ", see " target-text(attr(href)) }
In particular, ``target-counter()`` and ``target-text()`` are useful when it
comes to tables of contents (see `an example
<https://github.com/Kozea/WeasyPrint/pull/652#issuecomment-403276559>`_).
.. _bookmarks:
comes to tables of contents (see `an example`_).
You can also control `PDF bookmarks`_ with WeasyPrint. Using the experimental_
``bookmark-level``, ``bookmark-label`` and ``bookmark-state`` properties, you
@ -382,13 +458,13 @@ The other features of this module are **not** implemented:
.. _Quotes: https://www.w3.org/TR/css-content-3/#quotes
.. _Named strings: https://www.w3.org/TR/css-content-3/#named-strings
.. _Cross-references: https://www.w3.org/TR/css-content-3/#cross-references
.. _an example: https://github.com/Kozea/WeasyPrint/pull/652#issuecomment-403276559
.. _PDF bookmarks: https://www.w3.org/TR/css-content-3/#bookmark-generation
.. _experimental: http://www.w3.org/TR/css-2010/#experimental
.. _user agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css
CSS Color Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++
The `CSS Color Module Level 3`_ is a recommendation defining "CSS properties
which allow authors to specify the foreground color and opacity of an
@ -402,9 +478,8 @@ System Colors.
.. _CSS Color Module Level 3: http://www.w3.org/TR/css3-color/
CSS Transforms Module Level 1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+++++++++++++++++++++++++++++
The `CSS Transforms Module Level 1`_ working draft "describes a coordinate
system within each element is positioned. This coordinate space can be modified
@ -422,9 +497,8 @@ transformations (``matrix3d``, ``rotate(3d|X|Y|Z)``, ``translate(3d|Z)``,
.. _CSS Transforms Module Level 1: http://dev.w3.org/csswg/css3-transforms/
CSS Backgrounds and Borders Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++++++++
The `CSS Backgrounds and Borders Module Level 3`_ is a candidate recommendation
defining properties dealing "with the decoration of the border area and with
@ -460,9 +534,8 @@ shadows.
.. _box shadow part: http://www.w3.org/TR/css3-background/#misc
.. _git branch: https://github.com/Kozea/WeasyPrint/pull/149
CSS Image Values and Replaced Content Module Level 3 / 4
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++++++++++++++++++++++
The `Image Values and Replaced Content Module Level 3`_ is a candidate
recommendation introducing "additional ways of representing 2D images, for
@ -492,27 +565,38 @@ The ``image-orientation`` property is **not** supported.
.. _Image Values and Replaced Content Module Level 3: http://www.w3.org/TR/css3-images/
.. _Image Values and Replaced Content Module Level 4: http://www.w3.org/TR/css4-images/
CSS Box Sizing Module Level 3
+++++++++++++++++++++++++++++
CSS Basic User Interface Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `CSS Box Sizing Module Level 3`_ is a candidate recommendation extending
"the CSS sizing properties with keywords that represent content-based
'intrinsic' sizes and context-based 'extrinsic' sizes."
The `CSS Basic User Interface Module Level 3`_ also known as CSS3 UI is a
candidate recommendation describing "CSS properties which enable authors to
style user interface related properties and values."
The new property defined in this document is implemented in WeasyPrint:
``box-sizing``.
Two new properties defined in this document are implemented in WeasyPrint:
the ``box-sizing`` and ``text-overflow`` properties.
The ``min-content``, ``max-content`` and ``fit-content()`` sizing values are
**not** supported.
Some of the properties do not apply for WeasyPrint: ``cursor``, ``resize``,
``caret-color``, ``nav-(up|right|down|left)``.
.. _CSS Box Sizing Module Level 3: https://www.w3.org/TR/css-sizing-3/
The ``outline-offset`` property is **not** implemented.
CSS Overflow Module Level 3
+++++++++++++++++++++++++++
.. _CSS Basic User Interface Module Level 3: http://www.w3.org/TR/css-ui-3/
The `CSS Overflow Module Level 3`_ is a working draft containing "the features
of CSS relating to scrollable overflow handling in visual media."
The ``overflow`` property is supported, as defined in CSS2. ``overflow-x``,
``overflow-y``, ``overflow-clip-margin``, ``overflow-inline`` and
``overflow-block`` are **not** supported.
The ``text-overflow``, ``block-ellipsis``, ``line-clamp``, ``max-lines`` and
``continue`` properties are supported.
.. _CSS Overflow Module Level 3: https://www.w3.org/TR/2020/WD-css-overflow-3-20200603/
CSS Values and Units Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+++++++++++++++++++++++++++++++++++
The `CSS Values and Units Module Level 3`_ defines various units and
keywords used in "value definition field of each CSS property".
@ -535,9 +619,8 @@ supported.
.. _CSS Values and Units Module Level 3: https://www.w3.org/TR/css3-values/
CSS Multi-column Layout Module
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++
The `CSS Multi-column Layout Module`_ "describes multi-column layouts in CSS, a
style sheet language for the web. Using functionality described in the
@ -565,9 +648,8 @@ that should be efficient with simple cases.
.. _CSS Multi-column Layout Module: https://www.w3.org/TR/css3-multicol/
CSS Fragmentation Module Level 3 / 4
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++
The `CSS Fragmentation Module Level 3`_ "describes the fragmentation model that
partitions a flow into pages, columns, or regions. It builds on the Page model
@ -592,9 +674,8 @@ The ``margin-break`` property is supported.
.. _CSS Fragmentation Module Level 3: https://www.w3.org/TR/css-break-3/
.. _CSS Fragmentation Module Level 4: https://www.w3.org/TR/css-break-4/
CSS Custom Properties for Cascading Variables Module Level 1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
The `CSS Custom Properties for Cascading Variables Module Level 1`_ "introduces
cascading variables as a new primitive value type that is accepted by all CSS
@ -602,12 +683,10 @@ properties, and custom properties for defining them."
The custom properties and the ``var()`` notation are supported.
.. _CSS Custom Properties for Cascading Variables Module Level 1:
https://www.w3.org/TR/css-variables/
.. _CSS Custom Properties for Cascading Variables Module Level 1: https://www.w3.org/TR/css-variables/
CSS Text Decoration Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++
The `CSS Text Decoration Module Level 3`_ "contains the features of CSS
relating to text decoration, such as underlines, text shadows, and emphasis
@ -621,12 +700,10 @@ supported.
The other properties (``text-underline-position``, ``text-emphasis-*``,
``text-shadow``) are not supported.
.. _CSS Text Decoration Module Level 3:
https://www.w3.org/TR/css-text-decor-3/
.. _CSS Text Decoration Module Level 3: https://www.w3.org/TR/css-text-decor-3/
CSS Flexible Box Layout Module Level 1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++++
The `CSS Flexible Box Layout Module Level 1`_ "describes a CSS box model
optimized for user interface design", also known as "flexbox".

File diff suppressed because it is too large Load Diff

128
docs/common_use_cases.rst Normal file
View File

@ -0,0 +1,128 @@
Common Use Cases
================
Include in Web Applications
---------------------------
Using WeasyPrint in web applications sometimes requires attention on some
details.
Security Problems
.................
First of all, rendering untrusted HTML and CSS files can lead to :ref:`security
problems <Security>`. Please be sure to carefully follow the different proposed
solutions if you allow your users to modify the source of the rendered
documents in any way.
Rights Management
.................
Another problem is rights management: you often need to render templates that
can only be accessed by authenticated users, and WeasyPrint installed on the
server doesnt send the same cookies as the ones sent by the users. Extensions
such as Flask-WeasyPrint_ (for Flask_) or Django-WeasyPrint_ (for Django_)
solve this issue with a small amount of code. If you use another framework, you
can read these extensions and probably find an equivalent workaround.
.. _Flask-Weasyprint: https://github.com/Kozea/Flask-WeasyPrint
.. _Flask: http://flask.pocoo.org/
.. _Django-WeasyPrint: https://github.com/fdemmer/django-weasyprint
.. _Django: https://www.djangoproject.com/
Server Side Requests & Self-Signed SSL Certificates
...................................................
If your server is requesting data from itself, you may encounter a self-signed
certificate error, even if you have a valid certificate.
You need to add yourself as a Certificate Authority, so that your self-signed
SSL certificates can be requested.
.. code-block:: bash
# If you have not yet created a certificate.
sudo openssl req -x509 \
-sha256 \
-nodes \
-newkey rsa:4096 \
-days 365 \
-keyout localhost.key \
-out localhost.crt
# Follow the prompts about your certificate and the domain name.
openssl x509 -text -noout -in localhost.crt
Add your new self-signed SSL certificate to your nginx.conf, below the line
``server_name 123.123.123.123;``:
.. code-block:: bash
ssl_certificate /etc/ssl/certs/localhost.crt;
ssl_certificate_key /etc/ssl/private/localhost.key;
The SSL certificate will be valid when accessing your website from the
internet. However, images will not render when requesting files from the same
server.
You will need to add your new self-signed certificates as trusted:
.. code-block:: bash
sudo cp /etc/ssl/certs/localhost.crt /usr/local/share/ca-certificates/localhost.crt
sudo cp /etc/ssl/private/localhost.key /usr/local/share/ca-certificates/localhost.key
# Update the certificate authority trusted certificates.
sudo update-ca-certificates
# Export your newly updated Certificate Authority Bundle file.
# If using Django, it will use the newly signed certificate authority as
# valid and images will load properly.
sudo tee -a /etc/environment <<< 'export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt'
Adjust Document Dimensions
--------------------------
WeasyPrint does not provide support for adjusting page size or document margins
via command-line flags. This is best accomplished with the CSS ``@page``
at-rule. Consider the following example:
.. code-block:: css
@page {
size: Letter; /* Change from the default size of A4 */
margin: 3cm; /* Set margin on each page */
}
There is much more which can be achieved with the ``@page`` at-rule,
such as page numbers, headers, etc. Read more about the page_ at-rule.
.. _page: https://developer.mozilla.org/en-US/docs/Web/CSS/@page
Improve Rendering Speed and Memory Use
--------------------------------------
WeasyPrint is often slower than other web engines. Python is the usual suspect,
but its not the main culprit here. :ref:`Optimization is not the main goal of
WeasyPrint <Why Python?>` and it may lead to unbearable long rendering times.
First of all: WeasyPrints performance gets generally better with time. You can
check WeasyPerf_ to compare time and memory needed across versions.
Some tips may help you to get better results.
- A high number of CSS properties with a high number of HTML tags can lead to a
huge amount of time spent for the cascade. Avoiding large CSS frameworks can
drastically reduce the rendering time.
- Tables are known to be slow, especially when they are rendered on multiple
pages. When possible, using a common block layout instead gives much faster
layouts.
- Encoding detection can be really slow when HTML lines are really long.
Providing an explicit encoding or removing the ``chardet`` module fixes the
problem (see `#29`_).
.. _WeasyPerf: https://kozea.github.io/WeasyPerf/
.. _#29: https://github.com/chardet/chardet/issues/29

View File

@ -1,11 +1,12 @@
# WeasyPrint documentation build configuration file.
from pathlib import Path
import weasyprint
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx']
autodoc_member_order = 'bysource'
extensions = [
'sphinx.ext.autodoc', 'sphinx.ext.intersphinx',
'sphinx.ext.autosectionlabel']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@ -18,14 +19,14 @@ master_doc = 'index'
# General information about the project.
project = 'WeasyPrint'
copyright = '2011-2020, Simon Sapin and contributors (see AUTHORS)'
copyright = 'Simon Sapin and contributors'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The full version, including alpha/beta/rc tags.
release = (Path(__file__).parent.parent / 'weasyprint' / 'VERSION').read_text()
release = weasyprint.__version__
# The short X.Y version.
version = release
@ -35,42 +36,35 @@ version = release
exclude_patterns = ['_build']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = 'monokai'
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'sphinx_rtd_theme'
html_theme_options = {
'logo_only': True,
'collapse_navigation': False,
}
html_context = {
'extra_css_files': ['_static/custom.css']
}
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = '_static/logo.svg'
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
html_favicon = '_static/icon.ico'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = []
# These paths are either relative to html_static_path
# or fully qualified paths (eg. https://...)
html_css_files = [
'https://www.courtbouillon.org/static/docs.css',
]
# Output file base name for HTML help builder.
htmlhelp_basename = 'WeasyPrintdoc'
htmlhelp_basename = 'weasyprintdoc'
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'weasyprint', 'WeasyPrint Documentation',
['Simon Sapin and contributors, see AUTHORs'], 1)
['Simon Sapin and contributors'], 1)
]
# Grouping the document tree into Texinfo files. List of tuples
@ -78,12 +72,12 @@ man_pages = [
# dir menu entry, description, category)
texinfo_documents = [(
'index', 'WeasyPrint', 'WeasyPrint Documentation',
'Simon Sapin and contributors, see AUTHORs',
'WeasyPrint', 'One line description of project.',
'Simon Sapin', 'WeasyPrint', 'The Awesome Document Factory',
'Miscellaneous'),
]
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
'python': ('http://docs.python.org/', None),
'python': ('https://docs.python.org/3/', None),
'pydyf': ('https://doc.courtbouillon.org/pydyf/stable/', None),
}

70
docs/contribute.rst Normal file
View File

@ -0,0 +1,70 @@
Contribute
==========
You want to add some code to WeasyPrint, launch its tests or improve its
documentation? Thank you very much! Here are some tips to help you play with
WeasyPrint in good conditions.
The first step is to clone the repository, create a virtual environment and
install WeasyPrint dependencies.
.. code-block:: shell
git clone https://github.com/Kozea/WeasyPrint.git
cd WeasyPrint
python -m venv venv
venv/bin/pip install .[doc,test]
You can then let your terminal in the current directory and launch Python to
test your changes. ``import weasyprint`` will then import the working directory
code, so that you can modify it and test your changes.
.. code-block:: shell
venv/bin/python
Code & Issues
-------------
If youve found a bug in WeasyPrint, its time to report it, and to fix it if you
can!
You can report bugs and feature requests on `GitHub`_. If you want to add or
fix some code, please fork the repository and create a pull request, well be
happy to review your work.
You can find more information about the code architecture in the :ref:`Dive
into the Source` section.
.. _GitHub: https://github.com/Kozea/WeasyPrint
Tests
-----
Tests are stored in the ``tests`` folder at the top of the repository. They use
the `pytest`_ library.
You can launch tests (with code coverage and lint) using the following command::
venv/bin/python -m pytest
.. _pytest: https://docs.pytest.org/
Documentation
-------------
Documentation is stored in the ``docs`` folder at the top of the repository. It
relies on the `Sphinx`_ library.
You can build the documentation using the following command::
venv/bin/sphinx-build docs docs/_build
The documentation home page can now be found in the
``/path/to/weasyprint/docs/_build/index.html`` file. You can open this file in a
browser to see the final rendering.
.. _Sphinx: https://www.sphinx-doc.org/

View File

@ -1,21 +1,220 @@
Tutorial
========
First Steps
===========
As a standalone program
-----------------------
.. currentmodule:: weasyprint
Once you have WeasyPrint :doc:`installed </install>`, you should have a
``weasyprint`` executable. Using it can be as simple as this:
Installation
------------
WeasyPrint |version| depends on:
* Python_ ≥ 3.6.0
* Pango_ ≥ 1.44.0
* CFFI_ ≥ 0.6
* html5lib_ ≥ 1.0.1
* pydyf_ ≥ 0.0.1
* tinycss2_ ≥ 1.0.0
* cssselect2_ ≥ 0.1
* Pyphen_ ≥ 0.9.1
* Pillow_ ≥ 4.0.0
* fontTools_ ≥ 4.0.0
.. _Python: http://www.python.org/
.. _Pango: http://www.pango.org/
.. _CFFI: https://cffi.readthedocs.io/
.. _html5lib: https://html5lib.readthedocs.io/
.. _pydyf: https://doc.courtbouillon.org/pydyf/
.. _tinycss2: https://doc.courtbouillon.org/tinycss2/
.. _cssselect2: https://doc.courtbouillon.org/cssselect2/
.. _Pyphen: http://pyphen.org/
.. _Pillow: https://python-pillow.org/
.. _fontTools: https://github.com/fonttools/fonttools
There are many ways to install WeasyPrint, depending on the system you use.
Linux
~~~~~
The easiest way to install WeasyPrint on Linux is to use the package manager of
your distribution. WeasyPrint is packaged for recent versions of Debian_,
Ubuntu_, Fedora_, Archlinux_, Gentoo_…
.. _Debian: https://packages.debian.org/search?keywords=weasyprint&searchon=names&suite=all&section=all
.. _Ubuntu: https://packages.ubuntu.com/search?keywords=weasyprint&searchon=names&suite=all&section=all
.. _Fedora: https://src.fedoraproject.org/rpms/weasyprint
.. _Archlinux: https://aur.archlinux.org/packages/python-weasyprint
.. _Gentoo: https://packages.gentoo.org/packages/dev-python/weasyprint
If WeasyPrint is not available on your distribution, or if you want to use a
more recent version of WeasyPrint, you have to be sure that Python_ (at least
version 3.6.0) and Pango_ (at least version 1.44.0) are installed on your
system. You can verify this by launching::
python3 --version
pango-view --version
When everything is OK, you can install WeasyPrint in a `virtual environment`_
using `pip`_::
python3 -m venv venv
source venv/bin/activate
pip install weasyprint
weasyprint --info
.. _virtual environment: https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/
.. _pip: https://pip.pypa.io/
macOS
~~~~~
The easiest way to install WeasyPrint on macOS is to use Homebrew_.
When Homebrew is installed, install Python, Pango and libffi::
brew install python pango libffi
You can then install WeasyPrint in a `virtual environment`_ using `pip`_::
python3 -m venv venv
source venv/bin/activate
pip install weasyprint
weasyprint --info
.. _Homebrew: https://brew.sh/
Windows
~~~~~~~
Installing WeasyPrint on Windows requires to follow a few steps that may not be
easy. Please read this chapter carefully.
Only Windows 10 64-bit is supported. You can find this information in the
Control Panel → System and Security → System.
The first step is to install the latest version of Python from the `Microsoft
Store`_.
When Python is installed, you have to install GTK. Download the latest `GTK3
installer`_ and launch it. If you dont know what some options mean, you can
safely keep the default options selected.
When everything is OK, you can launch a command prompt by clicking on the Start
menu, typing "cmd" and clicking the "Command Prompt" icon. You can then install
WeasyPrint in a `virtual environment`_ using `pip`_::
python3 -m venv venv
venv\Scripts\activate.bat
python3 -m pip install weasyprint
python3 -m weasyprint --info
.. _Microsoft Store: https://www.microsoft.com/en-us/search?q=python
.. _GTK3 installer: https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases
Other Solutions
~~~~~~~~~~~~~~~
Other solutions are available to install WeasyPrint. These solutions are not
tested but they are known to work for some use cases on specific platforms.
Macports
++++++++
On macOS, you can install WeasyPrints dependencies with Macports_::
sudo port install py-pip pango libffi
You can then install WeasyPrint in a `virtual environment`_ using `pip`_::
python3 -m venv venv
source venv/bin/activate
pip install weasyprint
weasyprint --info
.. _Macports: https://www.macports.org/
Conda
+++++
On Linux and macOS, WeasyPrint is available on Conda_, with `a WeasyPrint
package on Conda Forge`_.
.. _Conda: https://docs.conda.io/projects/conda/en/latest/
.. _a WeasyPrint package on Conda Forge: https://anaconda.org/conda-forge/weasyprint
WSL
+++
On Windows, you can also use WSL_ and install WeasyPrint the same way it has to
be installed on Linux.
.. _WSL: https://docs.microsoft.com/en-us/windows/wsl/
.NET Wrapper
++++++++++++
On Windows, Bader Albarrak maintains `a .NET wrapper`_.
.. _a .NET wrapper: https://github.com/balbarak/WeasyPrint-netcore
AWS
+++
Kotify maintains `an AWS Lambda layer`_, see issue `#1003`_ for more
information.
.. _an AWS Lambda layer: https://github.com/kotify/cloud-print-utils
.. _#1003: https://github.com/Kozea/WeasyPrint/issues/1003
Troubleshooting
~~~~~~~~~~~~~~~
Most of the installation problems have already been met, and some `issues on
GitHub`_ could help you to solve them.
Missing Library
+++++++++++++++
On Windows, most of the problems come from unreachable libraries. If you get an
error like ``cannot load library 'xxx': error xxx``, it means that this library
is not installed or not in the ``PATH`` environment variable.
You can find more about this issue in `#589`_, `#721`_ or `#1240`_.
.. _issues on GitHub: https://github.com/Kozea/WeasyPrint/issues
.. _#589: https://github.com/Kozea/WeasyPrint/issues/589
.. _#721: https://github.com/Kozea/WeasyPrint/issues/721
.. _#1240: https://github.com/Kozea/WeasyPrint/issues/1240
Missing Fonts
+++++++++++++
If no character is drawn in the generated PDF, or if you get squares instead of
letters, you have to install fonts and make them available to WeasyPrint.
Following the standard way to install fonts on your system should be enough.
You can also use ``@font-face`` rules to explicitly reference fonts using URLs.
Command-Line
------------
Using the WeasyPrint command line interface can be as simple as this:
.. code-block:: sh
weasyprint http://weasyprint.org /tmp/weasyprint-website.pdf
You may see warnings on *stderr* about unsupported CSS properties.
See :ref:`command-line-api` for the details of all available options.
You may see warnings on the standard error output about unsupported CSS
properties. See :ref:`Command-Line API` for the details of all available
options.
In particular, the ``-s`` option can add a filename for a
:ref:`user stylesheet <stylesheet-origins>`. For quick experimentation
:ref:`user stylesheet <Stylesheet Origins>`. For quick experimentation
however, you may not want to create a file. In bash or zsh, you can
use the shells redirection instead:
@ -28,31 +227,8 @@ If you have many documents to convert you may prefer using the Python API
in long-lived processes to avoid paying the start-up costs every time.
Adjusting Document Dimensions
.............................
Currently, WeasyPrint does not provide support for adjusting page size
or document margins via command-line flags. This is best accomplished
with the CSS ``@page`` at-rule. Consider the following example:
.. code-block:: css
@page {
size: Letter; /* Change from the default size of A4 */
margin: 2.5cm; /* Set margin on each page */
}
There is much more which can be achieved with the ``@page`` at-rule,
such as page numbers, headers, etc. Read more about the page_ at-rule,
and find an example here_.
.. _page: https://developer.mozilla.org/en-US/docs/Web/CSS/@page
.. _here: https://weasyprint.org
As a Python library
-------------------
.. currentmodule:: weasyprint
Python Library
--------------
.. attention::
@ -60,7 +236,7 @@ As a Python library
:ref:`security problems <security>`.
Quickstart
..........
~~~~~~~~~~
The Python version of the above example goes like this:
@ -77,9 +253,8 @@ The Python version of the above example goes like this:
HTML('http://weasyprint.org/').write_pdf('/tmp/weasyprint-website.pdf',
stylesheets=[CSS(string='body { font-family: serif !important }')])
Instantiating HTML and CSS objects
..................................
Instantiating HTML and CSS Objects
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you have a file name, an absolute URL or a readable :term:`file object`,
you can just pass it to :class:`HTML` or :class:`CSS` to create an instance.
@ -132,35 +307,32 @@ If you have ``@font-face`` rules in your CSS, you have to create a
'/tmp/example.pdf', stylesheets=[css],
font_config=font_config)
Rendering to a Single File
~~~~~~~~~~~~~~~~~~~~~~~~~~
Rendering to a single file
..........................
Once you have a :class:`HTML` object, call its :meth:`HTML.write_pdf` method to
get the rendered document in a single PDF file.
Once you have a :class:`HTML` object, call its :meth:`~HTML.write_pdf` method
to get the rendered document in a single PDF or PNG file.
Without arguments, this method returns a byte string in memory. If you pass a
file name or a writable :term:`file object`, they will write there directly
instead. (**Warning**: with a filename, these methods will overwrite existing
files silently.)
Without arguments, these methods return a byte string in memory. If you
pass a file name or a writable :term:`file object`, they will write there
directly instead. (**Warning**: with a filename, these methods will
overwrite existing files silently.)
Individual Pages & Meta-Data
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Individual pages, meta-data, other output formats, …
....................................................
.. currentmodule:: weasyprint.document
If you want more than a single PDF, the :meth:`~weasyprint.HTML.render` method
gives you a :class:`Document` object with access to individual :class:`Page`
objects. Thus you can get the number of pages, their size\ [#]_, the details of
hyperlinks and bookmarks, etc. Documents also have a
:meth:`~Document.write_pdf` method, and you can get a subset of the pages with
:meth:`~Document.copy()`. Finally, for ultimate control, :meth:`~Page.paint`
individual pages anywhere on any pydyf stream.
If you want more than a single PDF, the :meth:`HTML.render` method gives you a
:class:`document.Document` object with access to individual
:class:`document.Page` objects. Thus you can get the number of pages, their
size\ [#]_, the details of hyperlinks and bookmarks, etc. Documents also have a
:meth:`document.Document.write_pdf` method, and you can get a subset of the
pages with :meth:`document.Document.copy()`. Finally, for ultimate control,
:meth:`document.Page.paint` individual pages anywhere on any
:class:`pydyf.Stream`.
.. [#] Pages in the same document do not always have the same size.
See the :ref:`python-api` for details. A few random examples:
See the :ref:`Python API` for details. A few random examples:
.. code-block:: python
@ -189,18 +361,15 @@ See the :ref:`python-api` for details. A few random examples:
print_outline(bookmark.children, indent + 2)
print_outline(document.make_bookmark_tree())
.. _url-fetchers:
URL fetchers
............
URL Fetchers
~~~~~~~~~~~~
WeasyPrint goes through a *URL fetcher* to fetch external resources such as
images or CSS stylesheets. The default fetcher can natively open file and
HTTP URLs, but the HTTP client does not support advanced features like cookies
or authentication. This can be worked-around by passing a custom
``url_fetcher`` callable to the :class:`HTML` or :class:`CSS` classes.
It must have the same signature as :func:`~weasyprint.default_url_fetcher`.
It must have the same signature as :func:`default_url_fetcher`.
Custom fetchers can choose to handle some URLs and defer others
to the default fetcher:
@ -240,14 +409,13 @@ A custom fetcher should be returning a :obj:`dict` with
If a ``file_obj`` is given, the resource will be closed automatically by
the function internally used by WeasyPrint to retreive data.
.. _Flask-WeasyPrint: http://packages.python.org/Flask-WeasyPrint/
.. _Flask-Weasyprint: https://github.com/Kozea/Flask-WeasyPrint
.. _Flask: http://flask.pocoo.org/
.. _Django-WeasyPrint: https://pypi.org/project/django-weasyprint/
.. _Django-WeasyPrint: https://github.com/fdemmer/django-weasyprint
.. _Django: https://www.djangoproject.com/
Logging
.......
~~~~~~~
Most errors (unsupported CSS property, missing image, ...)
are not fatal and will not prevent a document from being rendered.
@ -271,8 +439,6 @@ when used as a library.
See the documentation of the :mod:`logging` module for details.
.. _security:
Security
--------
@ -283,10 +449,8 @@ high memory use, endless renderings or local files leaks.
*This section has been added thanks to the very useful reports and advice from
Raz Becker.*
.. _long-renderings:
Long renderings
...............
Long Renderings
~~~~~~~~~~~~~~~
WeasyPrint is pretty slow and can take a long time to render long documents or
specially crafted HTML pages.
@ -308,12 +472,12 @@ untrusted users, you should:
- truncate and sanitize HTML and CSS input to avoid very long documents and
access to external URLs.
Infinite requests
.................
Infinite Requests
~~~~~~~~~~~~~~~~~
WeasyPrint can reach files on the network, for example using ``http://``
URIs. For various reasons, HTTP requests may take a long time and lead to
problems similar to :ref:`long-renderings`.
problems similar to :ref:`Long Renderings`.
WeasyPrint has a default timeout of 10 seconds for HTTP, HTTPS and FTP
resources. This timeout has no effect with other protocols, including access to
@ -322,11 +486,11 @@ resources. This timeout has no effect with other protocols, including access to
If you use WeasyPrint on a server with HTML or CSS samples coming from
untrusted users, or need to reach network resources, you should:
- use a custom `URL fetcher <url-fetchers>`_,
- follow solutions listed in :ref:`long-renderings`.
- use a custom :ref:`URL fetcher <URL Fetchers>`,
- follow solutions listed in :ref:`Long Renderings`.
Infinite loops
..............
Infinite Loops
~~~~~~~~~~~~~~
WeasyPrint has been hit by a large number of bugs, including infinite
loops. Specially crafted HTML and CSS files can quite easily lead to infinite
@ -335,10 +499,10 @@ loops and infinite rendering times.
If you use WeasyPrint on a server with HTML or CSS samples coming from
untrusted users, you should:
- follow solutions listed in :ref:`long-renderings`.
- follow solutions listed in :ref:`Long Renderings`.
Huge values
...........
Huge Values
~~~~~~~~~~~
WeasyPrint doesn't restrict integer and float values used in CSS. Using huge
values for some properties (page sizes, font sizes, block sizes) can lead to
@ -352,10 +516,10 @@ pre-processors using relative units (``em`` and ``%`` for example).
If you use WeasyPrint on a server with HTML or CSS samples coming from
untrusted users, you should:
- follow solutions listed in :ref:`long-renderings`.
- follow solutions listed in :ref:`Long Renderings`.
Access to local files
.....................
Access to Local Files
~~~~~~~~~~~~~~~~~~~~~
As any web renderer, WeasyPrint can reach files on the local filesystem using
``file://`` URIs. These files can be shown in ``img`` or ``embed`` tags for
@ -373,12 +537,12 @@ If you use WeasyPrint on a server with HTML or CSS samples coming from
untrusted users, you should:
- restrict your process access to trusted files using sandboxing solutions,
- use a custom `URL fetcher <url-fetchers>`_ that doesn't allow ``file://``
- use a custom :ref:`URL fetcher <URL Fetchers>` that doesn't allow ``file://``
URLs or filters access depending on given paths.
- follow solutions listed in :ref:`long-renderings`.
- follow solutions listed in :ref:`Long Renderings`.
System information leaks
........................
System Information Leaks
~~~~~~~~~~~~~~~~~~~~~~~~
WeasyPrint relies on many libraries that can leak hardware and software
information. Even when this information looks useless, it can be used by
@ -393,8 +557,8 @@ Leaks can include (but are not restricted to):
- Python, Pango and other libraries versions (implementation details
lead to different renderings).
SVG images
..........
SVG Images
~~~~~~~~~~
Rendering SVG images more or less suffers from the same problems as the ones
listed here for WeasyPrint.
@ -403,34 +567,3 @@ Security advices apply for untrusted SVG files as they apply for untrusted HTML
and CSS documents.
Note that WeasyPrints URL fetcher is used to render SVG files.
Errors
------
If you get an exception during rendering, it is probably a bug in WeasyPrint.
Please copy the full traceback and report it on our `issue tracker`_.
.. _issue tracker: https://github.com/Kozea/WeasyPrint/issues
.. _stylesheet-origins:
Stylesheet origins
------------------
HTML documents are rendered with stylesheets from three *origins*:
* The HTML5 `user agent stylesheet`_ (defines the default appearance
of HTML elements);
* Author stylesheets embedded in the document in ``<style>`` elements
or linked by ``<link rel=stylesheet>`` elements;
* User stylesheets provided in the API.
Keep in mind that *user* stylesheets have a lower priority than *author*
stylesheets in the cascade_, unless you use `!important`_ in declarations
to raise their priority.
.. _user agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css
.. _cascade: http://www.w3.org/TR/CSS21/cascade.html#cascading-order
.. _!important: http://www.w3.org/TR/CSS21/cascade.html#important-rules

267
docs/going_further.rst Normal file
View File

@ -0,0 +1,267 @@
Going Further
=============
.. currentmodule:: weasyprint
Why WeasyPrint?
---------------
Automatic document generation is a common need of many applications, even if a
lot of operations do not require printed paper anymore.
Invoices, tickets, leaflets, diplomas, documentation, books… All these
documents are read and used on paper, but also on electronical readers, on
smartphones, on computers. PDF is a great format to store and display them in
a reliable way, with pagination.
Using HTML and CSS to generate static and paged content can be strange at first
glance: browsers display only one page, with variable dimensions, often in a
very dynamic way. But paged media layout is actually included in CSS2_, which
was already a W3C recommendation in 1998.
Other well-known tools can be used to automatically generate PDF documents,
like LaTeX and LibreOffice, but they miss many advantages that HTML and CSS
offer. HTML and CSS are very widely known, by developers but also by
webdesigners. They are specified in a backwards-compatible way, and regularly
adapted to please the use of billions of people. They are really easy to write
and generate, with a ridiculous amount of tools that are finely adapted to the
needs and taste of their users.
However, the web engines that are used for browsers were very limited for
pagination when WeasyPrint was created in 2011. Even now, they lack a lot of
basic features. Thats why projects such as wkhtmltopdf_ and PagedJS_ have been
created: they add some of these features to existing browsers.
Other solutions have beed developed, including web engine dedicated to paged
media. Prince_, Antennahouse_ or `Typeset.sh`_ created original renderers
supporting many features related to pagination. These tools are very powerful,
but they are not open source.
Building a free and open source web renderer generating high-quality documents
is the main goal of WeasyPrint. Do you think that it was a little bit crazy to
create such a big project from scratch? Here is what `Simon Sapin`_ wrote
in WeasyPrints documentation one month after the beginning:
Are we crazy? Yes. But not that much. Each modern web browser did take many
developers many years of work to get where they are now, but WeasyPrints
scope is much smaller: there is no user-interaction, no JavaScript, no live
rendering (the document doesnt changed after it was first parsed) and no
quirks mode (we dont need to support every broken page of the web.)
We still need however to implement the whole CSS box model and visual
rendering. This is a lot of work, but we feel we can get something useful
much quicker than “Lets build a rendering engine!” may seem.
Simon is often right.
.. _CSS2: https://www.w3.org/TR/1998/REC-CSS2-19980512/
.. _wkhtmltopdf: https://wkhtmltopdf.org/
.. _PagedJS: https://www.pagedjs.org/
.. _Prince: https://www.princexml.com/
.. _Antennahouse: https://www.antennahouse.com/
.. _Typeset.sh: https://typeset.sh/
.. _Simon Sapin: https://exyr.org/
Why Python?
-----------
Python is a really good language to design a small, OS-agnostic parser. As it
is object-oriented, it gives the possibility to follow the specification with
high-level classes and a small amount of very simple code.
Speed is not WeasyPrints main goal. Web rendering is a very complex task, and
following :pep:`the Zen of Python <20>` helped a lot to keep our sanity (both in our
code and in our heads): code simplicity, maintainability and flexibility are
the most important goals for this library, as they give the ability to stay
really close to the specification and to fix bugs easily.
Dive into the Source
--------------------
This chapter is a high-level overview of WeasyPrints source code. For more
details, see the various docstrings or even the code itself. When in doubt,
feel free to :ref:`ask <Support>`!
Much `like in web browsers`_, the rendering of a document in WeasyPrint goes
like this:
1. The HTML document is fetched and parsed into a tree of elements (like DOM).
2. CSS stylesheets (either found in the HTML or supplied by the user) are
fetched and parsed.
3. The stylesheets are applied to the DOM-like tree.
4. The DOM-like tree with styles is transformed into a *formatting structure*
made of rectangular boxes.
5. These boxes are *laid-out* with fixed dimensions and position onto pages.
6. For each page, the boxes are re-ordered to observe stacking rules, and are
drawn on a PDF page.
7. Metadata such as document information, attachments, embedded files,
hyperlinks, and PDF trim and bleed boxes are added to the PDF.
.. _like in web browsers: http://www.html5rocks.com/en/tutorials/internals/howbrowserswork/#The_main_flow
Parsing HTML
............
Not much to see here. The :class:`HTML` class handles step 1 and
gives a tree of HTML *elements*. Although the actual API is different, this
tree is conceptually the same as what web browsers call *the DOM*.
Parsing CSS
...........
As with HTML, CSS stylesheets are parsed in the :class:`CSS` class
with an external library, tinycss2_.
In addition to the actual parsing, the ``css`` and ``css.validation``
modules do some pre-processing:
* Unknown and unsupported declarations are ignored with warnings.
Remaining property values are parsed in a property-specific way
from raw tinycss2 tokens into a higher-level form.
* Shorthand properties are expanded. For example, ``margin`` becomes
``margin-top``, ``margin-right``, ``margin-bottom`` and ``margin-left``.
* Hyphens in property names are replaced by underscores (``margin-top`` becomes
``margin_top``). This transformation is safe since none of the known (not
ignored) properties have an underscore character.
* Selectors are pre-compiled with cssselect2_.
.. _tinycss2: https://pypi.python.org/pypi/tinycss2
.. _cssselect2: https://pypi.python.org/pypi/cssselect2
The Cascade
...........
After that and still in the ``css`` package, the cascade_
(thats the C in CSS!) applies the stylesheets to the element tree.
Selectors associate property declarations to elements. In case of conflicting
declarations (different values for the same property on the same element),
the one with the highest *weight* wins. Weights are based on the stylesheets
:ref:`origin <Stylesheet Origins>`, ``!important`` markers, selector
specificity and source order. Missing values are filled in through
*inheritance* (from the parent element) or the propertys *initial value*,
so that every element has a *specified value* for every property.
.. _cascade: http://www.w3.org/TR/CSS21/cascade.html
These *specified values* are turned into *computed values* in the
``css.computed_values`` module. Keywords and lengths in various units are
converted to pixels, etc. At this point the value for some properties can be
represented by a single number or string, but some require more complex
objects. For example, a ``Dimension`` object can be either an absolute length
or a percentage.
The final result of the ``css.get_all_computed_styles`` function is a big dict
where keys are ``(element, pseudo_element_type)`` tuples, and keys are style
dict objects. Elements are ElementTree elements, while the type of
pseudo-element is a string for eg. ``::first-line`` selectors, or :obj:`None`
for “normal” elements. Style dict objects are dicts mapping property names to
the computed values. (The return value is not the dict itself, but a
convenience ``style_for`` function for accessing it.)
Formatting Structure
....................
The `visual formatting model`_ explains how *elements* (from the ElementTree
tree) generate *boxes* (in the formatting structure). This is step 4 above.
Boxes may have children and thus form a tree, much like elements. This tree is
generally close but not identical to the ElementTree tree: some elements
generate more than one box or none.
.. _visual formatting model: http://www.w3.org/TR/CSS21/visuren.html
Boxes are of a lot of different kinds. For example you should not confuse
*block-level boxes* and *block containers*, though *block boxes* are both. The
``formatting_structure.boxes`` module has a whole hierarchy of classes to
represent all these boxes. We wont go into the details here, see the module
and class docstrings.
The ``formatting_structure.build`` module takes an ElementTree tree with
associated computed styles, and builds a formatting structure. It generates the
right boxes for each element and ensures they conform to the models rules
(eg. an inline box can not contain a block). Each box has a ``style``
attribute containing the style dict of computed values.
The main logic is based on the ``display`` property, but it can be overridden
for some elements by adding a handler in the ``html`` module.
This is how ``<img>`` and ``<td colspan=3>`` are currently implemented,
for example.
This module is rather short as most of HTML is defined in CSS rather than
in Python, in the `user agent stylesheet`_.
The ``formatting_structure.build.build_formatting_structure`` function returns
the box for the root element (and, through its ``children`` attribute, the
whole tree).
.. _user agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css
Layout
......
Step 5 is the layout. You could say the everything else is glue code and
this is where the magic happens.
During the layout the documents content is, well, laid out on pages.
This is when we decide where to do line breaks and page breaks. If a break
happens inside of a box, that box is split into two (or more) boxes in the
layout result.
According to the `box model`_, each box has rectangular margin, border,
padding and content areas:
.. _box model: http://www.w3.org/TR/CSS21/box.html
.. image:: https://www.w3.org/TR/CSS21/images/boxdim.png
:alt: CSS Box Model
While ``box.style`` contains computed values, the `used values`_ are set as
attributes of the ``Box`` object itself during the layout. This include
resolving percentages and especially ``auto`` values into absolute, pixel
lengths. Once the layout done, each box has used values for margins, border
width, padding of each four sides, as well as the ``width`` and ``height`` of
the content area. They also have ``position_x`` and ``position_y``, the
absolute coordinates of the top-left corner of the margin box (**not** the
content box) from the top-left corner of the page.\ [#]_
Boxes also have helpers methods such as ``content_box_y`` and ``margin_width``
that give other metrics that can be useful in various parts of the code.
The final result of the layout is a list of ``PageBox`` objects.
.. [#] These are the coordinates *if* no `CSS transform`_ applies.
Transforms change the actual location of boxes, but they are applied
later during drawing and do not affect layout.
.. _used values: http://www.w3.org/TR/CSS21/cascade.html#used-value
.. _CSS transform: http://www.w3.org/TR/css3-transforms/
Stacking & Drawing
..................
In step 6, the boxes are reordered by the ``stacking`` module to observe
`stacking rules`_ such as the ``z-index`` property. The result is a tree of
*stacking contexts*.
Next, each laid-out page is *drawn* onto a PDF page. Since each box has
absolute coordinates on the page from the layout step, the logic here should be
minimal. If you find yourself adding a lot of logic here, maybe it should go in
the layout or stacking instead.
The code lives in the ``draw`` module.
.. _stacking rules: http://www.w3.org/TR/CSS21/zindex.html
Metadata
........
Finally (step 7), the ``pdf`` adds metadata to the PDF file: document
information, attachments, hyperlinks, embedded files, trim box and bleed box.

View File

@ -1,235 +0,0 @@
Hacking WeasyPrint
==================
Assuming you already have the :doc:`dependencies </install>`,
install the `development version`_ of WeasyPrint:
.. _development version: https://github.com/Kozea/WeasyPrint
.. code-block:: sh
git clone git://github.com/Kozea/WeasyPrint.git
cd WeasyPrint
python3 -m venv env
. env/bin/activate
pip install -e .[doc,test]
weasyprint --help
This will install WeasyPrint in “editable” mode (which means that you dont
need to re-install it every time you make a change in the source code) as well
as `pytest <http://pytest.org/>`_ and `Sphinx <http://www.sphinx-doc.org/>`_.
Lastly, in order to pass unit tests, your system must have as default font any
font with a condensed variant (i.e. DejaVu) - typically installable via your
distro's packaging system.
Documentation changes
---------------------
The documentation lives in the ``docs`` directory,
but API section references docstrings in the source code.
Run ``python setup.py build_sphinx`` to rebuild the documentation
and get the output in ``docs/_build/html``.
The website version is updated automatically when we push to master on GitHub.
Code changes
------------
Use the ``python setup.py test`` command from the ``WeasyPrint`` directory to
run the test suite.
Please report any bugs/feature requests and submit patches/pull requests
`on Github <https://github.com/Kozea/WeasyPrint>`_.
Dive into the source
--------------------
The rest of this document is a high-level overview of WeasyPrints source
code. For more details, see the various docstrings or even the code itself.
When in doubt, feel free to `ask <http://weasyprint.org/about/>`_!
Much like `in web browsers
<http://www.html5rocks.com/en/tutorials/internals/howbrowserswork/#The_main_flow>`_,
the rendering of a document in WeasyPrint goes like this:
1. The HTML document is fetched and parsed into a tree of elements (like DOM).
2. CSS stylesheets (either found in the HTML or supplied by the user) are
fetched and parsed.
3. The stylesheets are applied to the DOM-like tree.
4. The DOM-like tree with styles is transformed into a *formatting structure*
made of rectangular boxes.
5. These boxes are *laid-out* with fixed dimensions and position onto pages.
6. For each page, the boxes are re-ordered to observe stacking rules, and are
drawn on a PDF page.
7. Metadata such as attachments, embedded files, and PDF trim and bleed boxes
are added to the PDF.
HTML
....
Not much to see here. The :class:`weasyprint.HTML` class handles step 1 and
gives a tree of HTML *elements*. Although the actual API is different, this
tree is conceptually the same as what web browsers call *the DOM*.
CSS
...
As with HTML, CSS stylesheets are parsed in the :class:`weasyprint.CSS` class
with an external library, tinycss2_.
In addition to the actual parsing, the :mod:`weasyprint.css` and
:mod:`weasyprint.css.validation` modules do some pre-processing:
* Unknown and unsupported declarations are ignored with warnings.
Remaining property values are parsed in a property-specific way
from raw tinycss2 tokens into a higher-level form.
* Shorthand properties are expanded. For example, ``margin`` becomes
``margin-top``, ``margin-right``, ``margin-bottom`` and ``margin-left``.
* Hyphens in property names are replaced by underscores (``margin-top`` becomes
``margin_top``). This transformation is safe since none of the known (not
ignored) properties have an underscore character.
* Selectors are pre-compiled with cssselect2_.
.. _tinycss2: https://pypi.python.org/pypi/tinycss2
.. _cssselect2: https://pypi.python.org/pypi/cssselect2
The cascade
...........
After that and still in the :mod:`weasyprint.css` package, the cascade_
(thats the C in CSS!) applies the stylesheets to the element tree.
Selectors associate property declarations to elements. In case of conflicting
declarations (different values for the same property on the same element),
the one with the highest *weight* wins. Weights are based on the stylesheets
:ref:`origin <stylesheet-origins>`, ``!important`` markers, selector
specificity and source order. Missing values are filled in through
*inheritance* (from the parent element) or the propertys *initial value*,
so that every element has a *specified value* for every property.
.. _cascade: http://www.w3.org/TR/CSS21/cascade.html
These *specified values* are turned into *computed values* in the
``weasyprint.css.computed_values`` module. Keywords and lengths in various
units are converted to pixels, etc. At this point the value for some
properties can be represented by a single number or string, but some require
more complex objects. For example, a :class:`Dimension` object can be either
an absolute length or a percentage.
The final result of the :func:`~weasyprint.css.get_all_computed_styles`
function is a big dict where keys are ``(element, pseudo_element_type)``
tuples, and keys are style dict objects. Elements are ElementTree elements,
while the type of pseudo-element is a string for eg. ``::first-line``
selectors, or :obj:`None` for “normal” elements. Style dict objects are dicts
mapping property names to the computed values. (The return value is not the
dict itself, but a convenience :func:`style_for` function for accessing it.)
Formatting structure
....................
The `visual formatting model`_ explains how *elements* (from the ElementTree
tree) generate *boxes* (in the formatting structure). This is step 4 above.
Boxes may have children and thus form a tree, much like elements. This tree is
generally close but not identical to the ElementTree tree: some elements
generate more than one box or none.
.. _visual formatting model: http://www.w3.org/TR/CSS21/visuren.html
Boxes are of a lot of different kinds. For example you should not confuse
*block-level boxes* and *block containers*, though *block boxes* are both.
The :mod:`weasyprint.formatting_structure.boxes` module has a whole hierarchy
of classes to represent all these boxes. We wont go into the details here,
see the module and class docstrings.
The :mod:`weasyprint.formatting_structure.build` module takes an ElementTree
tree with associated computed styles, and builds a formatting structure. It
generates the right boxes for each element and ensures they conform to the
models rules (eg. an inline box can not contain a block). Each box has a
:attr:`.style` attribute containing the style dict of computed values.
The main logic is based on the ``display`` property, but it can be overridden
for some elements by adding a handler in the ``weasyprint.html`` module.
This is how ``<img>`` and ``<td colspan=3>`` are currently implemented,
for example.
This module is rather short as most of HTML is defined in CSS rather than
in Python, in the `user agent stylesheet`_.
The :func:`~weasyprint.formatting_structure.build.build_formatting_structure`
function returns the box for the root element (and, through its
:attr:`children` attribute, the whole tree).
.. _user agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css
Layout
......
Step 5 is the layout. You could say the everything else is glue code and
this is where the magic happens.
During the layout the documents content is, well, laid out on pages.
This is when we decide where to do line breaks and page breaks. If a break
happens inside of a box, that box is split into two (or more) boxes in the
layout result.
According to the `box model`_, each box has rectangular margin, border,
padding and content areas:
.. _box model: http://www.w3.org/TR/CSS21/box.html
.. image:: _static/box_model.png
:align: center
While :obj:`box.style` contains computed values, the `used values`_ are set
as attributes of the :class:`Box` object itself during the layout. This
include resolving percentages and especially ``auto`` values into absolute,
pixel lengths. Once the layout done, each box has used values for
margins, border width, padding of each four sides, as well as the
:attr:`width` and :attr:`height` of the content area. They also have
:attr:`position_x` and :attr:`position_y`, the absolute coordinates of the
top-left corner of the margin box (**not** the content box) from the top-left
corner of the page.\ [#]_
Boxes also have helpers methods such as :meth:`content_box_y` and
:meth:`margin_width` that give other metrics that can be useful in various
parts of the code.
The final result of the layout is a list of :class:`PageBox` objects.
.. [#] These are the coordinates *if* no `CSS transform`_ applies.
Transforms change the actual location of boxes, but they are applied
later during drawing and do not affect layout.
.. _used values: http://www.w3.org/TR/CSS21/cascade.html#used-value
.. _CSS transform: http://www.w3.org/TR/css3-transforms/
Stacking & Drawing
..................
In step 6, the boxes are reordered by the :mod:`weasyprint.stacking` module
to observe `stacking rules`_ such as the ``z-index`` property.
The result is a tree of *stacking contexts*.
Next, each laid-out page is *drawn* onto a PDF page. Since each box has
absolute coordinates on the page from the layout step, the logic here should be
minimal. If you find yourself adding a lot of logic here, maybe it should go in
the layout or stacking instead.
The code lives in the :mod:`weasyprint.draw` module.
.. _stacking rules: http://www.w3.org/TR/CSS21/zindex.html
Metadata
........
Finally (step 7), the :mod:`weasyprint.pdf` module parses (if needed) the PDF
file produced and adds metadata: attachments, embedded files, trim box and
bleed box.

View File

@ -1,17 +1,23 @@
WeasyPrint
==========
.. currentmodule:: weasyprint
.. include:: ../README.rst
Documentation contents
----------------------
.. toctree::
:caption: Documentation
:maxdepth: 3
first_steps
common_use_cases
api_reference
going_further
.. toctree::
:maxdepth: 2
:caption: Extra Information
:maxdepth: 3
install
tutorial
tips-tricks
api
features
hacking
changelog
.. include:: ../AUTHORS
changelog
contribute
support

View File

@ -1,437 +0,0 @@
Installing
==========
WeasyPrint |version| depends on:
* CPython_ ≥ 3.6.0
* Pango_ ≥ 1.44.0
* CFFI_ ≥ 0.6
* html5lib_ ≥ 0.999999999
* pydyf_ ≥ 0.0.1
* tinycss2_ ≥ 1.0.0
* cssselect2_ ≥ 0.1
* Pyphen_ ≥ 0.9.1
* Pillow_ ≥ 4.0.0
.. _CPython: http://www.python.org/
.. _Pango: http://www.pango.org/
.. _CFFI: https://cffi.readthedocs.io/
.. _html5lib: https://html5lib.readthedocs.io/
.. _pydyf: https://doc.courtbouillon.org/pydyf/
.. _tinycss2: https://doc.courtbouillon.org/tinycss2/
.. _cssselect2: https://doc.courtbouillon.org/cssselect2/
.. _Pyphen: http://pyphen.org/
.. _Pillow: https://python-pillow.org/
Python and Pango need to be installed separately. See
platform-specific instructions for :ref:`Linux <linux>`, :ref:`macOS <macos>`
and :ref:`Windows <windows>` below.
Install WeasyPrint with pip_. This will automatically install most of
dependencies. You probably need either a virtual environment (venv,
recommended) or using ``sudo``.
.. _pip: http://pip-installer.org/
.. code-block:: sh
python3 -m venv ./venv
. ./venv/bin/activate
pip install WeasyPrint
Now lets try it:
.. code-block:: sh
weasyprint --help
weasyprint http://weasyprint.org ./weasyprint-website.pdf
You should see warnings about unsupported CSS 3 stuff; this is expected.
In the PDF you should see the WeasyPrint logo on the first page.
If everything goes well, youre ready to :doc:`start using </tutorial>`
WeasyPrint! Otherwise, please copy the full error message and
`report the problem <https://github.com/Kozea/WeasyPrint/issues/>`_.
.. _linux:
Linux
-----
Pango can not be installed with pip and need to be installed from your
platforms packages. CFFI can, but youd still need their own dependencies.
This section lists system packages for CFFI when available, the dependencies
otherwise. CFFI needs *libffi* with development files. On Debian, the package
is called ``libffi-dev``.
If your favorite system is not listed here but you know the package names,
`tell us <http://weasyprint.org/about/>`_ so we can add it here.
Debian / Ubuntu
~~~~~~~~~~~~~~~
WeasyPrint is `packaged for Debian 11 or newer
<https://packages.debian.org/search?searchon=names&keywords=weasyprint>`_.
You can install it with pip on Debian 11 Bullseye or newer, or on Ubuntu 20.04
Focal Fossa or newer, after installing the following packages:
.. code-block:: sh
sudo apt-get install python3-pip python3-cffi libpango-1.0-0
Fedora
~~~~~~
WeasyPrint is `packaged for Fedora
<http://rpms.remirepo.net/rpmphp/zoom.php?rpm=weasyprint>`_, but you can
install it with pip after installing the following packages:
.. code-block:: sh
sudo yum install python-pip python-cffi pango
Archlinux
~~~~~~~~~
WeasyPrint is `available in the AUR
<https://aur.archlinux.org/packages/python-weasyprint/>`_, but you can install
it with pip after installing the following packages:
.. code-block:: sh
sudo pacman -S python-pip python-cffi pango
Gentoo
~~~~~~
WeasyPrint is `packaged in Gentoo
<https://packages.gentoo.org/packages/dev-python/weasyprint>`_, but you can
install it with pip after installing the following packages:
.. code-block:: sh
emerge pip python-cffi pango
Alpine
~~~~~~
For Alpine Linux 3.11 or newer:
.. code-block:: sh
apk --update --upgrade add gcc musl-dev jpeg-dev zlib-dev libffi-dev pango-dev
.. _macos:
macOS
-----
WeasyPrint is automatically installed and tested on virtual macOS machines. The
official installation method relies on Homebrew. Install Homebrew if you haven't already:
.. code-block:: sh
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
Install Python, Pango and libffi using Homebrew:
.. code-block:: sh
brew install python pango libffi
If you get the `Fontconfig error: Cannot load default config file` message,
then try reinstalling fontconfig:
.. code-block:: sh
brew uninstall fontconfig
brew install fontconfig
You can also try with Macports, but please notice that this solution is not
tested and thus not recommended (**also known as "you're on your own and may
end up crying blood with sad dolphins for eternity"**):
.. code-block:: sh
sudo port install py-pip pango libffi
.. _windows:
Windows
-------
Dear Windows user, please follow these steps carefully.
Really carefully. Dont cheat.
Besides a proper Python installation and a few Python packages, WeasyPrint
needs the Pango library. It is required for text rendering. This library isn't
a Python package. It is part of `GTK+ <https://en.wikipedia.org/wiki/GTK+>`_
(formerly known as GIMP Toolkit), and must be installed separately.
The following installation instructions for the GTK+ libraries don't work on
Windows XP. That means: Windows Vista or later is required.
Of course you can decide to install ancient WeasyPrint versions with an
erstwhile Python versions, combine it with outdated GTK+ libraries on any
Windows version you like, but if you decide to do that **youre on your own,
dont even try to report an issue, kittens will die because of you.**
Step 1 - Install Python
~~~~~~~~~~~~~~~~~~~~~~~
Install the `latest Python 3.x <https://www.python.org/downloads/windows/>`_
- On Windows 32 bit download the "Windows **x86** executable installer"
- On Windows 64 bit download the "Windows **x86-64** executable installer"
Follow the `instructions <https://docs.python.org/3/using/windows.html>`_.
You may customize your installation as you like, but we suggest that you
"Add Python 3.x to PATH" for convenience and let the installer "install pip".
Step 2 - Update pip and setuptools packages
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Python is bundled with modules that may have been updated since the release.
Please open a *Command Prompt* and execute the following command:
.. code-block:: console
python -m pip install --upgrade pip setuptools
Step 3 - Install WeasyPrint
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the console window execute the following command to install the WeasyPrint
package:
.. code-block:: console
python -m pip install WeasyPrint
Step 4 - Install the GTK+ libraries
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There's one thing you **must** observe:
- If your Python is 32 bit you must use the 32 bit versions of those libraries.
- If your Python is 64 bit you must use the 64 bit versions of those libraries.
If you mismatch the bitness, the warning about kittens dying applies.
In case you forgot which Python architecture you installed, you can find out by
running the following command in command prompt:
.. code-block:: console
python --version --version
If your python architecture is 64 bit you can either use the :ref:`GTK+ 64 Bit
Installer <gtk64installer>` or install the 64-bit :ref:`GTK+ via MSYS2
<msys2_gtk>`.
If your python architecture is 32 bit you'll have to install the 32-bit :ref:`GTK+ via MSYS2
<msys2_gtk>`.
.. note::
Installing those libraries doesn't mean something extraordinary. It only
means that the files must be on your computer and WeasyPrint must be able
to find them, which is achieved by putting the path-to-the-libs into your
Windows ``PATH``.
.. _msys2_gtk:
Install GTK+ with the aid of MSYS2
""""""""""""""""""""""""""""""""""
Sadly the `GTK+ Runtime for 32 bit Windows
<https://gtk-win.sourceforge.io/home/index.php/Main/Home>`_ was discontinued in
April 2017. Since then developers are advised to either bundle GTK+ with their
software (which is beyond the capacities of the WeasyPrint maintainers) or
install it through the `MSYS2 project <https://msys2.github.io/>`_.
With the help of MSYS2, both the 32 bit as well as the 64 bit GTK+ can be
installed. If you installed the 64 bit Python and don't want to bother with
MSYS2, then go ahead and use the :ref:`GTK+ 64 Bit Installer <gtk64installer>`.
MSYS2 is a development environment. We (somehow) mis-use it to only supply the
up-to-date GTK+ runtime library files in a subfolder we can inject into our
``PATH``. But maybe you get interested in the full powers of MSYS2. It's the
perfect tool for experimenting with `MinGW
<https://en.wikipedia.org/wiki/MinGW>`_ and cross-platform development -- look
at its `wiki <https://github.com/msys2/msys2/wiki>`_.
Ok, let's install GTK3+.
* Download and run the `MSYS2 installer <http://www.msys2.org/>`_
- On 32 bit Windows: "msys2-**i686**-xxxxxxxx.exe"
- On 64 bit Windows: "msys2-**x86_64**-xxxxxxxx.exe"
You alternatively may download a zipped archive, unpack it and run
``msys2_shell.cmd`` as described in the `MSYS2 wiki
<https://github.com/msys2/msys2/wiki/MSYS2-installation>`_.
* Update the MSYS2 shell with
.. code-block:: console
pacman -Syuu
Close the shell by clicking the close button in the upper right corner of the window.
* Restart the MSYS2 shell. Repeat the command
.. code-block:: console
pacman -Su
until it says that there are no more packages to update.
* Install the GTK+ package and its dependencies.
To install the 32 bit (**i686**) GTK run the following command:
.. code-block:: console
pacman -S mingw-w64-i686-gtk3
The command for the 64 bit (**x86_64**) version is:
.. code-block:: console
pacman -S mingw-w64-x86_64-gtk3
The **x86_64** package cannot be installed in the 32 bit MSYS2!
* Close the shell:
.. code-block:: console
exit
* Now that all the GTK files needed by WeasyPrint are in the ``.\mingw32``
respectively in the ``.\mingw64`` subfolder of your MSYS2 installation directory,
we can (and must) make them accessible by injecting the appropriate folder into the
``PATH``.
Let's assume you installed MSYS2 in ``C:\msys2``. Then the folder to inject is:
* ``C:\msys2\mingw32\bin`` for the 32 bit GTK+
* ``C:\msys2\mingw64\bin`` for the 64 bit GTK+
You can either persist it through *Advanced System Settings* -- if you don't
know how to do that, read `How to set the path and environment variables in
Windows <https://www.computerhope.com/issues/ch000549.htm>`_ -- or
temporarily inject the folder before you run WeasyPrint.
.. _gtk64installer:
GTK+ 64 Bit Installer
""""""""""""""""""""""
If your Python is 64 bit you can use an installer extracted from MSYS2
and provided by Tom Schoonjans.
* Download and run the latest `gtk3-runtime-x.x.x-x-x-x-ts-win64.exe
<https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer>`_
* If you prefer to manage your ``PATH`` environment varaiable yourself you
should uncheck "Set up PATH environment variable to include GTK+" and supply
it later -- either persist it through *Advanced System Settings* or
temporarily inject it before you run WeasyPrint.
.. note::
Checking the option doesn't insert the GTK-path at the beginning of your
system ``PATH``, but rather **appends** it. If there is already another
(outdated) GTK on your ``PATH`` this will lead to unpleasant problems.
In any case: When executing WeasyPrint the GTK libraries must be on its ``PATH``.
Step 5 - Run WeasyPrint
~~~~~~~~~~~~~~~~~~~~~~~
Now that everything is in place you can test WeasyPrint.
Open a fresh *Command Prompt* and execute
.. code-block:: console
python -m weasyprint http://weasyprint.org weasyprint.pdf
If you get an error like ``OSError: dlopen() failed to load a library`` its
probably because pango (or another GTK+ library mentioned in the error message)
is not properly available in the folders listed in your ``PATH`` environment
variable.
Since you didn't cheat and followed the instructions the up-to-date and
complete set of GTK libraries **must** be present and the error is an error.
Let's find out. Enter the following command:
.. code-block:: console
WHERE libpango-1.0-0.dll
WHERE zlib1.dll
This should respond with
*path\\to\\recently\\installed\\gtk\\binaries\\libpango-1.0-0.dll*, for example:
.. code-block:: console
C:\msys2\mingw64\bin\libpango-1.0-0.dll
C:\Program Files\GTK3-Runtime Win64\bin\zlib1.dll
If your system answers with *nothing found* or returns a filename not related
to your recently-installed-gtk or lists more than one location and the first
file in the list isn't actually in a subfolder of your recently-installed-gtk,
then we have caught the culprit.
Depending on the GTK installation route you took, the proper folder name is
something along the lines of:
* ``C:\msys2\mingw64\bin``
* ``C:\Program Files\GTK3-Runtime Win64\bin``
Determine the correct folder and execute the following commands, replace
``<path-to-recently-installed-gtk>`` accordingly:
.. code-block:: console
SET PROPER_GTK_FOLDER=<path-to-recently-installed-gtk>
SET PATH=%PROPER_GTK_FOLDER%;%PATH%
This puts the appropriate GTK at the beginning of your ``PATH`` and
its files are the first found when WeasyPrint requires them.
Call WeasyPrint again:
.. code-block:: console
python -m weasyprint http://weasyprint.org weasyprint.pdf
If the error is gone you should either fix your ``PATH`` permanently (via
*Advanced System Settings*) or execute the above ``SET PATH`` command by
default (once!) before you start using WeasyPrint.
If the error still occurs and if you really didn't cheat then you are allowed
to open a `new issue <https://github.com/Kozea/WeasyPrint/issues/new>`_. You
can also find extra help in this `bug report
<https://github.com/Kozea/WeasyPrint/issues/589>`_. If you cheated, then, you
know: Kittens already died.
Other Options for Installation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There is a .NET wrapper for WeasyPrint available `here
<https://github.com/balbarak/WeasyPrint-netcore>`_.

28
docs/support.rst Normal file
View File

@ -0,0 +1,28 @@
Support
=======
Sponsorship
-----------
With `donations and sponsorship`_, you help the projects to be
better. Donations allow the CourtBouillon team to have more time dedicated to
add new features, fix bugs, and improve documentation.
.. _donations and sponsorship: https://opencollective.com/courtbouillon
Professionnal Support
---------------------
You can improve your experience with CourtBouillons tools thanks to our
professional support. You want bugs fixed as soon as possible? You projects
would highly benefit from some new features? You or your team would like to get
new skills with one of the technologies we master?
Please contact us by mail_, by chat_ or by tweet_ to get in touch and find the
best way we can help you.
.. _mail: mailto:contact@courtbouillon.org
.. _chat: https://gitter.im/CourtBouillon/tinycss2
.. _tweet: https://twitter.com/BouillonCourt

View File

@ -1,328 +0,0 @@
Tips & Tricks
=============
This page presents some tips and tricks, mostly in the form of code snippets.
.. note::
These tips are primarily sourced from the community. You too can share your tricks with the community, just open a PR! (If you do so, don't forget to make your code readable for the others and add some context :).)
Include header and footer of arbitrary complexity in a PDF
----------------------------------------------------------
Why this snippet?
.................
Objective: Render a header and a footer of arbitrary complexity on every page of a PDF file.
Currently, Weasyprint allow to include simple information in the margin of each page (see the report in the library `examples <https://weasyprint.org/samples/>`_). This is possible thanks to CSS3 at-rules (syntax presentation `here <https://www.qhmit.com/css/at-rules/>`_). At-rules provide the ability to include characters in the margin of paged media. They are used to add things like page numbers or titles on the page.
Yet elements of arbitrary complexity can't be introduced in the margin. The :ref:`class <code>` in this snippet provides a solution to include any header and/or a complex footer, however complex they are.
How to use this snippet?
........................
#. Alongside the main html file that you plan to export as a PDF, create a header html and/or a footer html.
#. Render the html files as strings, as you would normally do for your main html file. Then pass these strings to the class constructor under the names ``main_html``, ``header_html`` and ``footer_html``.
#. To get your PDF simply call the method ``render_pdf``.
.. note::
This constructor provide side margins with a sensible default of 2 centimeters. You can of course change the width of this margin if you want to. Just like you can change the default of 30 pixels between the header and footer elements and the core of the document.
How to write the header and footer?
...................................
For the HTML, the entire content of the header should be wrapped into a `header` tag and the content of the footer in a `footer` tag.
For the CSS, use fixed position and position the element yourself, either at the top for the header or the bottom for the footer.
Example CSS for a header:
.. code-block:: css
header {
position: fixed;
top: 0;
left: 0;
height: 2.5cm;
width: 100%;
background-color: #1a1a1a;
}
/* For the footer, replace `top: 0` by `bottom: 0` */
The html and css of the main page don't change.
.. _code:
Show me the code!
.................
.. code-block:: python
from weasyprint import HTML, CSS
class PdfGenerator:
"""
Generate a PDF out of a rendered template, with the possibility to integrate nicely
a header and a footer if provided.
Notes:
------
- When Weasyprint renders an html into a PDF, it goes though several intermediate steps.
Here, in this class, we deal mostly with a box representation: 1 `Document` have 1 `Page`
or more, each `Page` 1 `Box` or more. Each box can contain other box. Hence the recursive
method `get_element` for example.
For more, see:
https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source
https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure
- Warning: the logic of this class relies heavily on the internal Weasyprint API. This
snippet was written at the time of the release 47, it might break in the future.
- This generator draws its inspiration and, also a bit of its implementation, from this
discussion in the library github issues: https://github.com/Kozea/WeasyPrint/issues/92
"""
OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}'
def __init__(self, main_html, header_html=None, footer_html=None,
base_url=None, side_margin=2, extra_vertical_margin=30):
"""
Parameters
----------
main_html: str
An HTML file (most of the time a template rendered into a string) which represents
the core of the PDF to generate.
header_html: str
An optional header html.
footer_html: str
An optional footer html.
base_url: str
An absolute url to the page which serves as a reference to Weasyprint to fetch assets,
required to get our media.
side_margin: int, interpreted in cm, by default 2cm
The margin to apply on the core of the rendered PDF (i.e. main_html).
extra_vertical_margin: int, interpreted in pixel, by default 30 pixels
An extra margin to apply between the main content and header and the footer.
The goal is to avoid having the content of `main_html` touching the header or the
footer.
"""
self.main_html = main_html
self.header_html = header_html
self.footer_html = footer_html
self.base_url = base_url
self.side_margin = side_margin
self.extra_vertical_margin = extra_vertical_margin
def _compute_overlay_element(self, element: str):
"""
Parameters
----------
element: str
Either 'header' or 'footer'
Returns
-------
element_body: BlockBox
A Weasyprint pre-rendered representation of an html element
element_height: float
The height of this element, which will be then translated in a html height
"""
html = HTML(
string=getattr(self, f'{element}_html'),
base_url=self.base_url,
)
element_doc = html.render(stylesheets=[CSS(string=self.OVERLAY_LAYOUT)])
element_page = element_doc.pages[0]
element_body = PdfGenerator.get_element(element_page._page_box.all_children(), 'body')
element_body = element_body.copy_with_children(element_body.all_children())
element_html = PdfGenerator.get_element(element_page._page_box.all_children(), element)
if element == 'header':
element_height = element_html.height
if element == 'footer':
element_height = element_page.height - element_html.position_y
return element_body, element_height
def _apply_overlay_on_main(self, main_doc, header_body=None, footer_body=None):
"""
Insert the header and the footer in the main document.
Parameters
----------
main_doc: Document
The top level representation for a PDF page in Weasyprint.
header_body: BlockBox
A representation for an html element in Weasyprint.
footer_body: BlockBox
A representation for an html element in Weasyprint.
"""
for page in main_doc.pages:
page_body = PdfGenerator.get_element(page._page_box.all_children(), 'body')
if header_body:
page_body.children += header_body.all_children()
if footer_body:
page_body.children += footer_body.all_children()
def render_pdf(self):
"""
Returns
-------
pdf: a bytes sequence
The rendered PDF.
"""
if self.header_html:
header_body, header_height = self._compute_overlay_element('header')
else:
header_body, header_height = None, 0
if self.footer_html:
footer_body, footer_height = self._compute_overlay_element('footer')
else:
footer_body, footer_height = None, 0
margins = '{header_size}px {side_margin} {footer_size}px {side_margin}'.format(
header_size=header_height + self.extra_vertical_margin,
footer_size=footer_height + self.extra_vertical_margin,
side_margin=f'{self.side_margin}cm',
)
content_print_layout = '@page {size: A4 portrait; margin: %s;}' % margins
html = HTML(
string=self.main_html,
base_url=self.base_url,
)
main_doc = html.render(stylesheets=[CSS(string=content_print_layout)])
if self.header_html or self.footer_html:
self._apply_overlay_on_main(main_doc, header_body, footer_body)
pdf = main_doc.write_pdf()
return pdf
@staticmethod
def get_element(boxes, element):
"""
Given a set of boxes representing the elements of a PDF page in a DOM-like way, find the
box which is named `element`.
Look at the notes of the class for more details on Weasyprint insides.
"""
for box in boxes:
if box.element_tag == element:
return box
return PdfGenerator.get_element(box.all_children(), element)
.. note::
In the `CSS Generated Content for Paged Media Module <https://www.w3.org/TR/css-gcpm-3/>`_, the W3C proposed standards to support most expected features for print media. `Running elements <https://www.w3.org/TR/css-gcpm-3/#running-elements>`_ are the CSS compliant solution to this problem. See this `issue on the project <https://github.com/Kozea/WeasyPrint/issues/92>`_ for more details for a possible implementation.
Edit the generated PDF using WeasyPrint's PDF editor
----------------------------------------------------
Why this snippet?
.................
You may want to edit the PDF generated by WeasyPrint, for example to add PDF features that are not supported by CSS properties.
WeasyPrint includes a very simple and limited PDF editor that can be used in this case. This PDF editor only works with documents generated by WeasyPrint.
In this example, we will set the magnification to "Fit page", so that the PDF size automatically fits in the PDF reader window when open.
How to use this snippet?
........................
You can use the code below as a simple Python script. Change the URL you want to render and the path of the generated PDF to fit your needs.
If you want to add other features, you will have to read the PDF specification!
Show me the code!
.................
.. code-block:: python
from io import BytesIO
from weasyprint import HTML
from weasyprint.pdf import PDFFile, pdf_format
html = HTML('http://weasyprint.org/')
content = BytesIO(html.write_pdf())
pdf_file = PDFFile(content)
params = pdf_format('/OpenAction [0 /FitV null]')
pdf_file.extend_dict(pdf_file.catalog, params)
pdf_file.finish()
pdf = pdf_file.fileobj.getvalue()
open('/tmp/weasyprint.pdf', 'wb').write(pdf)
Display forms
-------------
Why this snippet?
.................
Contrary to many browsers, WeasyPrint doesn't render form inputs using a custom
toolkit. As there's no dedicated stylesheet for them, they're often not
rendered at all.
Forms could also be rendered in generated PDF files, but it's not supported yet
(see issue `#61 <https://github.com/Kozea/WeasyPrint/issues/61>`_).
The easiest way to render inputs is to use a dedicated stylesheet.
How to use this snippet?
........................
Adapt and include the sample into your document stylesheets.
Show me the code!
.................
.. code-block:: python
input, textarea {
background: #eee;
border: 0.01em solid;
display: block;
margin: 0.2em 0;
}
[disabled] {
opacity: 0.3;
}
input[type=text] {
height: 1.2em;
width: 20em;
}
input[type=text]::before {
content: attr(value);
padding: 0.2em;
}
input[type=radio], input[type=checkbox] {
box-sizing: border-box;
background-clip: content-box;
height: 1em;
padding: 0.1em;
width: 1em;
}
input[checked] {
background-color: red;
}
input[type=radio] {
border-radius: 100%;
}
textarea {
font-family: monospace;
padding: 0.5em;
width: 20em;
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

View File

@ -13,12 +13,16 @@ home-page = 'https://www.courtbouillon.org/weasyprint'
requires = [
'pydyf >=0.0.1',
'cffi >=0.6',
'html5lib >=0.999999999',
'html5lib >=1.0.1',
'tinycss2 >=1.0.0',
'cssselect2 >=0.1',
'Pyphen >=0.9.1',
'fonttools[woff] >=4.0',
'Pillow >=4.0.0',
# We could use fonttools[woff], but see
# https://github.com/fonttools/fonttools/issues/2188
'fonttools >=4.0.0',
'brotli >=1.0.1; platform_python_implementation == "CPython"',
'brotlicffi >=0.8.0; platform_python_implementation != "CPython"',
]
requires-python = '>=3.6'
keywords = 'html css pdf converter'

View File

@ -308,17 +308,10 @@ def test_command_line_render(tmpdir):
py.path.local(resource_filename('')).chdir()
# Reference
html_obj = FakeHTML(string=combined, base_url='dummy.html')
# pdf_bytes = html_obj.write_pdf()
png_bytes = html_obj.write_png()
x2_png_bytes = html_obj.write_png(resolution=192)
rotated_png_bytes = FakeHTML(
pdf_bytes = html_obj.write_pdf()
rotated_pdf_bytes = FakeHTML(
string=combined, base_url='dummy.html',
media_type='screen').write_png()
empty_png_bytes = FakeHTML(
string=b'<style>' + css + b'</style>').write_png()
check_png_pattern(png_bytes)
check_png_pattern(rotated_png_bytes, rotated=True)
check_png_pattern(empty_png_bytes, blank=True)
media_type='screen').write_pdf()
tmpdir.chdir()
with open(resource_filename('pattern.png'), 'rb') as pattern_fd:
@ -331,83 +324,73 @@ def test_command_line_render(tmpdir):
tmpdir.join('linked.html').write_binary(linked)
tmpdir.join('style.css').write_binary(css)
_run('combined.html out1.png')
_run('combined.html out2.pdf')
assert tmpdir.join('out1.png').read_binary() == png_bytes
# TODO: check PDF content? How?
# assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes
assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes
_run('combined-UTF-16BE.html out3.png --encoding UTF-16BE')
assert tmpdir.join('out3.png').read_binary() == png_bytes
_run('combined-UTF-16BE.html out3.pdf --encoding UTF-16BE')
assert tmpdir.join('out3.pdf').read_binary() == pdf_bytes
_run(tmpdir.join('combined.html').strpath + ' out4.png')
assert tmpdir.join('out4.png').read_binary() == png_bytes
_run(tmpdir.join('combined.html').strpath + ' out4.pdf')
assert tmpdir.join('out4.pdf').read_binary() == pdf_bytes
_run(path2url(tmpdir.join('combined.html').strpath) + ' out5.png')
assert tmpdir.join('out5.png').read_binary() == png_bytes
_run(path2url(tmpdir.join('combined.html').strpath) + ' out5.pdf')
assert tmpdir.join('out5.pdf').read_binary() == pdf_bytes
_run('linked.html --debug out6.png') # test relative URLs
assert tmpdir.join('out6.png').read_binary() == png_bytes
_run('linked.html --debug out6.pdf') # test relative URLs
assert tmpdir.join('out6.pdf').read_binary() == pdf_bytes
_run('combined.html --verbose out7 -f png')
_run('combined.html --quiet out8 --format pdf')
assert tmpdir.join('out7').read_binary() == png_bytes
# assert tmpdir.join('out8').read_binary(), pdf_bytes
_run('combined.html --verbose out7')
_run('combined.html --quiet out8')
assert tmpdir.join('out7').read_binary() == pdf_bytes
assert tmpdir.join('out8').read_binary() == pdf_bytes
_run('no_css.html out9.png')
_run('no_css.html out10.png -s style.css')
assert tmpdir.join('out9.png').read_binary() != png_bytes
# assert tmpdir.join('out10.png').read_binary() == png_bytes
_run('no_css.html out9.pdf')
_run('no_css.html out10.pdf -s style.css')
assert tmpdir.join('out9.pdf').read_binary() != pdf_bytes
assert tmpdir.join('out10.pdf').read_binary() == pdf_bytes
stdout = _run('--format png combined.html -')
assert stdout == png_bytes
stdout = _run('combined.html -')
assert stdout == pdf_bytes
_run('- out11.png', stdin=combined)
check_png_pattern(tmpdir.join('out11.png').read_binary())
assert tmpdir.join('out11.png').read_binary() == png_bytes
_run('- out11.pdf', stdin=combined)
assert tmpdir.join('out11.pdf').read_binary() == pdf_bytes
stdout = _run('--format png - -', stdin=combined)
assert stdout == png_bytes
stdout = _run('- -', stdin=combined)
assert stdout == pdf_bytes
_run('combined.html out13.png --media-type screen')
_run('combined.html out12.png -m screen')
_run('linked.html out14.png -m screen')
assert tmpdir.join('out12.png').read_binary() == rotated_png_bytes
assert tmpdir.join('out13.png').read_binary() == rotated_png_bytes
assert tmpdir.join('out14.png').read_binary() == rotated_png_bytes
_run('combined.html out13.pdf --media-type screen')
_run('combined.html out12.pdf -m screen')
_run('linked.html out14.pdf -m screen')
assert tmpdir.join('out12.pdf').read_binary() == rotated_pdf_bytes
assert tmpdir.join('out13.pdf').read_binary() == rotated_pdf_bytes
assert tmpdir.join('out14.pdf').read_binary() == rotated_pdf_bytes
stdout = _run('-f pdf combined.html -')
stdout = _run('combined.html -')
assert stdout.count(b'attachment') == 0
stdout = _run('-f pdf combined.html -')
stdout = _run('combined.html -')
assert stdout.count(b'attachment') == 0
stdout = _run('-f pdf -a pattern.png combined.html -')
stdout = _run('-a pattern.png combined.html -')
assert stdout.count(b'attachment') == 1
stdout = _run('-f pdf -a style.css -a pattern.png combined.html -')
stdout = _run('-a style.css -a pattern.png combined.html -')
assert stdout.count(b'attachment') == 2
stdout = _run('-f png -r 192 linked.html -')
assert stdout == x2_png_bytes
stdout = _run('-f png --resolution 192 linked.html -')
assert _run('linked.html - -f png --resolution 192') == x2_png_bytes
assert stdout == x2_png_bytes
os.mkdir('subdirectory')
py.path.local('subdirectory').chdir()
with capture_logs() as logs:
stdout = _run('--format png - -', stdin=combined)
stdout = _run('- -', stdin=combined)
assert len(logs) == 1
assert logs[0].startswith('ERROR: Failed to load image')
assert stdout == empty_png_bytes
assert stdout.startswith(b'%PDF')
with capture_logs() as logs:
stdout = _run('--format png --base-url= - -', stdin=combined)
stdout = _run('--base-url= - -', stdin=combined)
assert len(logs) == 1
assert logs[0].startswith(
'ERROR: Relative URI reference without a base URI')
assert stdout == empty_png_bytes
assert stdout.startswith(b'%PDF')
stdout = _run('--format png --base-url .. - -', stdin=combined)
assert stdout == png_bytes
stdout = _run('--base-url .. - -', stdin=combined)
assert stdout == pdf_bytes
with pytest.raises(SystemExit):
_run('--info')
@ -415,15 +398,6 @@ def test_command_line_render(tmpdir):
with pytest.raises(SystemExit):
_run('--version')
with pytest.raises(SystemExit):
_run('combined.html combined.jpg')
with pytest.raises(SystemExit):
_run('combined.html combined.pdf --resolution 100')
with pytest.raises(SystemExit):
_run('combined.html combined.png -a pattern.png')
@assert_no_logs
def test_unicode_filenames(tmpdir):
@ -837,6 +811,8 @@ def test_url_fetcher():
'url(weasyprint-custom:foo/é_%e9_pattern)">')
test('<link rel=stylesheet href="weasyprint-custom:foo/bar.css"><body>')
test('<style>@import "weasyprint-custom:foo/bar.css";</style><body>')
test('<style>@import url(weasyprint-custom:foo/bar.css);</style><body>')
test('<style>@import url("weasyprint-custom:foo/bar.css");</style><body>')
test('<link rel=stylesheet href="weasyprint-custom:foo/bar.css"><body>')
with capture_logs() as logs:

View File

@ -391,7 +391,7 @@ def test_expand_list_style_invalid(rule):
def assert_background(css, **expected):
"""Helper checking the background properties."""
expanded = expand_to_dict('background: ' + css)
expanded = expand_to_dict(f'background: {css}')
assert expanded.pop('background_color') == expected.pop(
'background_color', INITIAL_VALUES['background_color'])
nb_layers = len(expanded['background_image'])
@ -524,7 +524,7 @@ def test_expand_background_position():
"""Test the ``background-position`` property."""
def position(css, *expected):
[(name, [value])] = expand_to_dict(
'background-position:' + css).items()
f'background-position: {css}').items()
assert name == 'background_position'
assert value == expected
for css_x, val_x in [

View File

@ -15,7 +15,7 @@ from . import assert_pixels
@assert_no_logs
@pytest.mark.parametrize(
'name, expected_width, expected_height, expected_pixels, html', (
('all_blue', 10, 10, (10 * (10 * 'B' + "\n")), '''
('all_blue', 10, 10, (10 * (10 * 'B' + '\n')), '''
<style>
@page { size: 10px }
/* bodys background propagates to the whole canvas */
@ -490,7 +490,7 @@ def test_background_image(name, css, pixels):
# BBBB
# BBBB
assert_pixels('background_' + name, 14, 16, pixels, '''
assert_pixels(f'background_{name}', 14, 16, pixels, '''
<style>
@page { size: 14px 16px }
html { background: #fff }
@ -531,7 +531,7 @@ def test_background_image_zero_size_background():
def test_background_origin():
"""Test the background-origin property."""
def test_value(value, pixels, css=None):
assert_pixels('background_origin_' + value, 12, 12, pixels, '''
assert_pixels(f'background_origin_{value}', 12, 12, pixels, '''
<style>
@page { size: 12px }
html { background: #fff }
@ -843,7 +843,7 @@ def test_background_repeat_round_4():
'''),
))
def test_background_clip(value, pixels):
assert_pixels('background_clip_' + value, 8, 8, pixels, '''
assert_pixels(f'background_clip_{value}', 8, 8, pixels, '''
<style>
@page { size: 8px }
html { background: #fff }

View File

@ -106,7 +106,7 @@ table = '''
))
def test_images(filename, image):
# TODO: fails because of missing SVG support
assert_pixels('inline_image_' + filename, 8, 8, image, '''
assert_pixels(f'inline_image_{filename}', 8, 8, image, '''
<style>
@page { size: 8px }
body { margin: 2px 0 0 2px; background: #fff; font-size: 0 }

View File

@ -49,7 +49,7 @@ from . import assert_pixels
''')
))
def test_list_style_image(position, pixels):
assert_pixels('list_style_image_' + position, 12, 10, pixels, '''
assert_pixels(f'list_style_image_{position}', 12, 10, pixels, '''
<style>
@page { size: 12px 10px }
body { margin: 0; background: white; font-family: %s }

View File

@ -6,6 +6,8 @@
"""
import pytest
from . import assert_pixels
@ -116,3 +118,349 @@ def test_text_align_rtl_trailing_whitespace():
<p style="direction: ltr"> abc </p>
<p style="direction: ltr"> &#8207;abc </p>
''')
def test_max_lines_ellipsis():
assert_pixels('max_lines_ellipsis', 10, 10, '''
BBBBBBBB__
BBBBBBBB__
BBBBBBBBBB
BBBBBBBBBB
__________
__________
__________
__________
__________
__________
''', '''
<style>
@page {size: 10px 10px;}
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
p {
block-ellipsis: auto;
color: blue;
font-family: weasyprint;
font-size: 2px;
max-lines: 2;
}
</style>
<p>
abcd efgh ijkl
</p>
''')
@pytest.mark.xfail
def test_max_lines_nested():
assert_pixels('max_lines_nested', 10, 12, '''
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
rrrrrrrrrr
rrrrrrrrrr
rrrrrrrrrr
rrrrrrrrrr
BBBBBBBBBB
BBBBBBBBBB
__________
__________
''', '''
<style>
@page {size: 10px 12px;}
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
div {
continue: discard;
font-family: weasyprint;
font-size: 2px;
}
#a {
color: blue;
max-lines: 5;
}
#b {
color: red
max-lines: 2;
}
</style>
<div id=a>
aaaaa
aaaaa
<div id=b>
bbbbb
bbbbb
bbbbb
bbbbb
</div>
aaaaa
aaaaa
</div>
''')
def test_line_clamp():
assert_pixels('line_clamp', 10, 10, '''
BBBB__BB__
BBBB__BB__
BBBB__BB__
BBBB__BB__
BBBBBBBBBB
BBBBBBBBBB
__________
__________
__________
__________
''', '''
<style>
@page {size: 10px 10px;}
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
p {
color: blue;
font-family: weasyprint;
font-size: 2px;
line-clamp: 3 "(…)";
}
</style>
<p>
aa a
bb b
cc c
dddd
eeee
ffff
gggg
hhhh
</p>
''')
@pytest.mark.xfail
def test_ellipsis_nested():
assert_pixels('ellipsis_nested', 10, 10, '''
BBBBBB____
BBBBBB____
BBBBBB____
BBBBBB____
BBBBBB____
BBBBBB____
BBBBBB____
BBBBBB____
BBBBBBBB__
BBBBBBBB__
''', '''
<style>
@page {size: 10px 10px;}
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
div {
block-ellipsis: auto;
color: blue;
continue: discard;
font-family: weasyprint;
font-size: 2px;
}
</style>
<div>
<p>aaa</p>
<p>aaa</p>
<p>aaa</p>
<p>aaa</p>
<p>aaa</p>
<p>aaa</p>
</div>
''')
def test_text_align_right():
assert_pixels('text_align_right', 9, 6, '''
_________
__RR__RR_
__RR__RR_
______RR_
______RR_
_________
''', '''
<style>
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
@page {
size: 9px 6px;
background: white;
}
body {
color: red;
font-family: weasyprint;
font-size: 2px;
}
div {
line-height: 1;
margin: 1px;
text-align: right;
}
</style>
<div>a c e</div>''')
def test_text_align_justify():
assert_pixels('text_align_justify', 9, 6, '''
_________
_RR___RR_
_RR___RR_
_RR______
_RR______
_________
''', '''
<style>
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
@page {
size: 9px 6px;
background: white;
}
body {
color: red;
font-family: weasyprint;
font-size: 2px;
}
div {
line-height: 1;
margin: 1px;
text-align: justify;
}
</style>
<div>a c e</div>''')
def test_text_word_spacing():
assert_pixels('text_word_spacing', 19, 4, '''
___________________
_RR____RR____RR____
_RR____RR____RR____
___________________
''', '''
<style>
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
@page {
size: 19px 4px;
background: white;
}
body {
color: red;
font-family: weasyprint;
font-size: 2px;
}
div {
line-height: 1;
margin: 1px;
word-spacing: 1em;
}
</style>
<div>a c e</div>''')
def test_text_letter_spacing():
assert_pixels('text_letter_spacing', 19, 4, '''
___________________
_RR____RR____RR____
_RR____RR____RR____
___________________
''', '''
<style>
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
@page {
size: 19px 4px;
background: white;
}
body {
color: red;
font-family: weasyprint;
font-size: 2px;
}
div {
line-height: 1;
margin: 1px;
letter-spacing: 2em;
}
</style>
<div>ace</div>''')
def test_text_underline():
assert_pixels('text_underline', 13, 7, '''
_____________
_zzzzzzzzzzz_
_zRRRRRRRRRz_
_zRRRRRRRRRz_
_zBBBBBBBBBz_
_zzzzzzzzzzz_
_____________
''', '''
<style>
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
@page {
size: 13px 7px;
background: white;
margin: 2px;
}
body {
color: red;
font-family: weasyprint;
font-size: 3px;
text-decoration: underline blue;
}
</style>
<div>abc</div>''')
def test_text_overline():
# Ascent value seems to be a bit random, dont try to get the exact
# position of the line
assert_pixels('text_overline', 13, 7, '''
_____________
_zzzzzzzzzzz_
_zzzzzzzzzzz_
_zRRRRRRRRRz_
_zRRRRRRRRRz_
_zzzzzzzzzzz_
_____________
''', '''
<style>
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
@page {
size: 13px 7px;
background: white;
margin: 2px;
}
body {
color: red;
font-family: weasyprint;
font-size: 3px;
text-decoration: overline blue;
}
</style>
<div>abc</div>''')
def test_text_line_through():
assert_pixels('text_line_through', 13, 7, '''
_____________
_zzzzzzzzzzz_
_zRRRRRRRRRz_
_zBBBBBBBBBz_
_zRRRRRRRRRz_
_zzzzzzzzzzz_
_____________
''', '''
<style>
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
@page {
size: 13px 7px;
background: white;
margin: 2px;
}
body {
color: red;
font-family: weasyprint;
font-size: 3px;
text-decoration: line-through blue;
}
</style>
<div>abc</div>''')

View File

@ -775,3 +775,62 @@ def test_box_margin_top_repagination():
div, h1 = body.children
assert div.margin_top == 0
assert div.padding_box_y() == 0
@assert_no_logs
def test_continue_discard():
page_1, = parse('''
<style>
@page { size: 80px; margin: 0 }
div { display: inline-block; width: 100%; height: 25px }
article { continue: discard; border: 1px solid; line-height: 1 }
</style>
<article>
<div>a</div>
<div>b</div>
<div>c</div>
<div>d</div>
<div>e</div>
<div>f</div>
</article>''')
html, = page_1.children
body, = html.children
article, = body.children
assert article.height == 3 * 25
div_1, div_2, div_3 = article.children
assert div_1.position_y == 1
assert div_2.position_y == 1 + 25
assert div_3.position_y == 1 + 25 * 2
assert article.border_bottom_width == 1
@assert_no_logs
def test_continue_discard_children():
page_1, = parse('''
<style>
@page { size: 80px; margin: 0 }
div { display: inline-block; width: 100%; height: 25px }
section { border: 1px solid }
article { continue: discard; border: 1px solid; line-height: 1 }
</style>
<article>
<section>
<div>a</div>
<div>b</div>
<div>c</div>
<div>d</div>
<div>e</div>
<div>f</div>
</section>
</article>''')
html, = page_1.children
body, = html.children
article, = body.children
assert article.height == 2 + 3 * 25
section, = article.children
assert section.height == 3 * 25
div_1, div_2, div_3 = section.children
assert div_1.position_y == 2
assert div_2.position_y == 2 + 25
assert div_3.position_y == 2 + 25 * 2
assert article.border_bottom_width == 1

View File

@ -8,7 +8,7 @@
import pytest
from weasyprint.css.properties import INITIAL_VALUES
from weasyprint.text import split_first_line
from weasyprint.text.line_break import split_first_line
from .test_boxes import render_pages
from .testing_utils import MONO_FONTS, SANS_FONTS, assert_no_logs
@ -410,7 +410,7 @@ def test_letter_spacing_1():
'<style>'
' strong {'
' letter-spacing: 11px;'
' max-width: ' + str(strong_3.width * 1.5) + 'px'
f' max-width: {strong_3.width * 1.5}px'
'}'
' span { display: inline-block }'
'</style>'
@ -1073,3 +1073,59 @@ def test_leader_content(leader, content):
after, = line.children
inline, = after.children
assert inline.children[0].text == content
@pytest.mark.xfail
@assert_no_logs
def test_max_lines():
page, = render_pages('''
<style>
@page {size: 10px 10px;}
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
p {
font-family: weasyprint;
font-size: 2px;
max-lines: 2;
}
</style>
<p>
abcd efgh ijkl
</p>
''')
html, = page.children
body, = html.children
p1, p2 = body.children
line1, line2 = p1.children
line3, = p2.children
text1, = line1.children
text2, = line2.children
text3, = line3.children
assert text1.text == 'abcd'
assert text2.text == 'efgh'
assert text3.text == 'ijkl'
@assert_no_logs
def test_continue():
page, = render_pages('''
<style>
@page {size: 10px 4px;}
@font-face {src: url(weasyprint.otf); font-family: weasyprint}
div {
continue: discard;
font-family: weasyprint;
font-size: 2px;
}
</style>
<div>
abcd efgh ijkl
</div>
''')
html, = page.children
body, = html.children
p, = body.children
line1, line2 = p.children
text1, = line1.children
text2, = line2.children
assert text1.text == 'abcd'
assert text2.text == 'efgh'

View File

@ -1 +0,0 @@
52.2

View File

@ -30,10 +30,11 @@ if hasattr(sys, 'frozen'): # pragma: no cover
else:
ROOT = Path(os.path.dirname(__file__))
VERSION = __version__ = (ROOT / 'VERSION').read_text().strip()
VERSION = __version__ = '53.0'
__all__ = ['HTML', 'CSS', 'Attachment', 'Document', 'Page',
'default_url_fetcher', 'VERSION']
__all__ = [
'HTML', 'CSS', 'Attachment', 'Document', 'Page', 'default_url_fetcher',
'VERSION', '__version__']
# Import after setting the version, as the version is used in other modules
@ -57,12 +58,10 @@ class HTML:
:type filename: str or pathlib.Path
:param filename: A filename, relative to the current directory, or
absolute.
:type url: str
:param url: An absolute, fully qualified URL.
:param str url: An absolute, fully qualified URL.
:type file_obj: :term:`file object`
:param file_obj: Any object with a ``read`` method.
:type string: str
:param string: A string of HTML source.
:param str string: A string of HTML source.
Specifying multiple inputs is an error:
``HTML(filename="foo.html", url="localhost://bar.html")``
@ -70,20 +69,17 @@ class HTML:
You can also pass optional named arguments:
:type encoding: str
:param encoding: Force the source character encoding.
:type base_url: str
:param base_url: The base used to resolve relative URLs
:param str encoding: Force the source character encoding.
:param str base_url: The base used to resolve relative URLs
(e.g. in ``<img src="../foo.png">``). If not provided, try to use
the input filename, URL, or ``name`` attribute of :term:`file objects
<file object>`.
:type url_fetcher: function
:type url_fetcher: :term:`function`
:param url_fetcher: A function or other callable
with the same signature as :func:`default_url_fetcher` called to
fetch external resources such as stylesheets and images.
(See :ref:`url-fetchers`.)
:type media_type: str
:param media_type: The media type to use for ``@media``.
(See :ref:`URL Fetchers`.)
:param str media_type: The media type to use for ``@media``.
Defaults to ``'print'``. **Note:** In some cases like
``HTML(string=foo)`` relative URLs will be invalid if ``base_url``
is not provided.
@ -128,29 +124,26 @@ class HTML:
"""Lay out and paginate the document, but do not (yet) export it
to PDF or PNG.
This returns a :class:`~document.Document` object which provides
This returns a :class:`document.Document` object which provides
access to individual pages and various meta-data.
See :meth:`write_pdf` to get a PDF directly.
.. versionadded:: 0.15
:type stylesheets: list
:param stylesheets:
:param list stylesheets:
An optional list of user stylesheets. List elements are
:class:`CSS` objects, filenames, URLs, or file
objects. (See :ref:`stylesheet-origins`.)
:type presentational_hints: bool
:param presentational_hints: Whether HTML presentational hints are
followed.
:type optimize_images: bool
:param optimize_images: Try to optimize the size of embedded images.
:type font_config: :class:`~fonts.FontConfiguration`
objects. (See :ref:`Stylesheet Origins`.)
:param bool presentational_hints:
Whether HTML presentational hints are followed.
:param bool optimize_images:
Try to optimize the size of embedded images.
:type font_config: :class:`text.fonts.FontConfiguration`
:param font_config: A font configuration handling ``@font-face`` rules.
:type counter_style: :class:`~css.counters.CounterStyle`
:type counter_style: :class:`css.counters.CounterStyle`
:param counter_style: A dictionary storing ``@counter-style`` rules.
:type image_cache: dict
:param image_cache: A dictionary used to cache images.
:returns: A :class:`~document.Document` object.
:param dict image_cache: A dictionary used to cache images.
:returns: A :class:`document.Document` object.
"""
return Document._render(
@ -166,36 +159,32 @@ class HTML:
This is a shortcut for calling :meth:`render`, then
:meth:`Document.write_pdf() <document.Document.write_pdf>`.
:type target: str, pathlib.Path or file object
:type target:
:class:`str`, :class:`pathlib.Path` or :term:`file object`
:param target:
A filename where the PDF file is generated, a file object, or
:obj:`None`.
:type stylesheets: list
:param stylesheets:
:param list stylesheets:
An optional list of user stylesheets. The list's elements
are :class:`CSS` objects, filenames, URLs, or file-like
objects. (See :ref:`stylesheet-origins`.)
:type zoom: float
:param zoom:
objects. (See :ref:`Stylesheet Origins`.)
:param float zoom:
The zoom factor in PDF units per CSS units. **Warning**:
All CSS units are affected, including physical units like
``cm`` and named sizes like ``A4``. For values other than
1, the physical CSS units will thus be "wrong".
:type attachments: list
:param attachments: A list of additional file attachments for the
:param list attachments: A list of additional file attachments for the
generated PDF document or :obj:`None`. The list's elements are
:class:`Attachment` objects, filenames, URLs or file-like objects.
:type presentational_hints: bool
:param presentational_hints: Whether HTML presentational hints are
:param bool presentational_hints: Whether HTML presentational hints are
followed.
:type optimize_images: bool
:param optimize_images: Try to optimize the size of embedded images.
:type font_config: :class:`~fonts.FontConfiguration`
:param bool optimize_images:
Try to optimize the size of embedded images.
:type font_config: :class:`text.fonts.FontConfiguration`
:param font_config: A font configuration handling ``@font-face`` rules.
:type counter_style: :class:`~css.counters.CounterStyle`
:type counter_style: :class:`css.counters.CounterStyle`
:param counter_style: A dictionary storing ``@counter-style`` rules.
:type image_cache: dict
:param image_cache: A dictionary used to cache images.
:param dict image_cache: A dictionary used to cache images.
:returns:
The PDF as :obj:`bytes` if ``target`` is not provided or
:obj:`None`, otherwise :obj:`None` (the PDF is written to
@ -217,11 +206,11 @@ class CSS:
arguments.
An additional argument called ``font_config`` must be provided to handle
``@font-config`` rules. The same ``fonts.FontConfiguration`` object must be
used for different ``CSS`` objects applied to the same document.
``@font-config`` rules. The same ``text.fonts.FontConfiguration`` object
must be used for different ``CSS`` objects applied to the same document.
``CSS`` objects have no public attributes or methods. They are only meant
to be used in the :meth:`~HTML.write_pdf` and :meth:`~HTML.render` methods
to be used in the :meth:`HTML.write_pdf` and :meth:`HTML.render` methods
of :class:`HTML` objects.
"""

View File

@ -14,7 +14,7 @@ import sys
import pydyf
from . import HTML, LOGGER, __version__
from .text import pango
from .text.ffi import pango
class PrintInfo(argparse.Action):
@ -57,7 +57,7 @@ def main(argv=None, stdout=None, stdin=None):
.. option:: -s <filename_or_URL>, --stylesheet <filename_or_URL>
Filename or URL of a user cascading stylesheet (see
:ref:`stylesheet-origins`) to add to the document
:ref:`Stylesheet Origins`) to add to the document
(e.g. ``-s print.css``). Multiple stylesheets are allowed.
.. option:: -m <type>, --media-type <type>
@ -120,17 +120,11 @@ def main(argv=None, stdout=None, stdin=None):
help='Print system information and exit.')
parser.add_argument('-e', '--encoding',
help='Character encoding of the input')
parser.add_argument('-f', '--format', choices=['pdf', 'png'],
help='Output format. Can be omitted if `output` '
'ends with a .pdf or .png extension.')
parser.add_argument('-s', '--stylesheet', action='append',
help='URL or filename for a user CSS stylesheet. '
'May be given multiple times.')
parser.add_argument('-m', '--media-type', default='print',
help='Media type to use for @media, defaults to print')
parser.add_argument('-r', '--resolution', type=float,
help='PNG only: the resolution in pixel per CSS inch. '
'Defaults to 96, one PNG pixel per CSS pixel.')
parser.add_argument('-u', '--base-url',
help='Base for relative URLs in the HTML input. '
"Defaults to the input's own filename or URL "
@ -155,19 +149,6 @@ def main(argv=None, stdout=None, stdin=None):
args = parser.parse_args(argv)
if args.format is None:
output_lower = args.output.lower()
if output_lower.endswith('.pdf'):
format_ = 'pdf'
elif output_lower.endswith('.png'):
format_ = 'png'
else:
parser.error(
'Either specify a format with -f or choose an '
'output filename that ends in .pdf or .png')
else:
format_ = args.format.lower()
if args.input == '-':
source = stdin or sys.stdin.buffer
if args.base_url is None:
@ -185,18 +166,8 @@ def main(argv=None, stdout=None, stdin=None):
kwargs = {
'stylesheets': args.stylesheet,
'presentational_hints': args.presentational_hints,
'optimize_images': args.optimize_images}
if args.resolution:
if format_ == 'png':
kwargs['resolution'] = args.resolution
else:
parser.error('--resolution only applies for the PNG format.')
if args.attachment:
if format_ == 'pdf':
kwargs['attachments'] = args.attachment
else:
parser.error('--attachment only applies for the PDF format.')
'optimize_images': args.optimize_images,
'attachments': args.attachment}
# Default to logging to stderr.
if args.debug:
@ -210,7 +181,7 @@ def main(argv=None, stdout=None, stdin=None):
html = HTML(source, base_url=args.base_url, encoding=args.encoding,
media_type=args.media_type)
getattr(html, 'write_' + format_)(output, **kwargs)
html.write_pdf(output, **kwargs)
if __name__ == '__main__': # pragma: no cover

View File

@ -26,7 +26,7 @@ from ..logger import LOGGER, PROGRESS_LOGGER
from ..urls import URLFetchingError, get_url_attribute, url_join
from . import computed_values, counters, media_queries
from .properties import INHERITED, INITIAL_NOT_COMPUTED, INITIAL_VALUES
from .utils import remove_whitespace
from .utils import get_url, remove_whitespace
from .validation import preprocess_declarations
from .validation.descriptors import preprocess_descriptors
@ -127,12 +127,12 @@ class StyleFor:
style['border_collapse'] == 'collapse'):
# Padding do not apply
for side in ['top', 'bottom', 'left', 'right']:
style['padding_' + side] = computed_values.ZERO_PIXELS
style[f'padding_{side}'] = computed_values.ZERO_PIXELS
if (style['display'].startswith('table-') and
style['display'] != 'table-caption'):
# Margins do not apply
for side in ['top', 'bottom', 'left', 'right']:
style['margin_' + side] = computed_values.ZERO_PIXELS
style[f'margin_{side}'] = computed_values.ZERO_PIXELS
return style
@ -830,9 +830,18 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
continue
tokens = remove_whitespace(rule.prelude)
if tokens and tokens[0].type in ('url', 'string'):
url = tokens[0].value
else:
url = None
if tokens:
if tokens[0].type == 'string':
url = url_join(
base_url, tokens[0].value, allow_relative=False,
context='@import at %s:%s',
context_args=(rule.source_line, rule.source_column))
else:
url_tuple = get_url(tokens[0], base_url)
if url_tuple and url_tuple[1][0] == 'external':
url = url_tuple[1][1]
if url is None:
continue
media = media_queries.parse_media_query(tokens[1:])
if media is None:
@ -845,10 +854,6 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
if not media_queries.evaluate_media_query(
media, device_media_type):
continue
url = url_join(
base_url, url, allow_relative=False,
context='@import at %d:%d',
context_args=(rule.source_line, rule.source_column))
if url is not None:
try:
CSS(
@ -910,7 +915,7 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
tinycss2.parse_declaration_list(margin_rule.content)))
if declarations:
selector_list = [(
specificity, '@' + margin_rule.lower_at_keyword,
specificity, f'@{margin_rule.lower_at_keyword}',
page_type)]
page_rules.append(
(margin_rule, selector_list, declarations))

View File

@ -12,8 +12,9 @@ from urllib.parse import unquote
from tinycss2.color3 import parse_color
from .. import text
from ..logger import LOGGER
from ..text.ffi import ffi, pango, units_to_double
from ..text.line_break import Layout, first_line_metrics, line_size
from ..urls import get_link_attribute
from .properties import (
INHERITED, INITIAL_NOT_COMPUTED, INITIAL_VALUES, Dimension)
@ -378,12 +379,12 @@ def length(computer, name, value, font_size=None, pixels_only=False):
elif unit == 'ch':
# TODO: cache
# TODO: use context to use @font-face fonts
layout = text.Layout(
layout = Layout(
context=None, font_size=font_size,
style=computer['computed'])
layout.set_text('0')
line, _ = layout.get_first_line()
logical_width, _ = text.get_size(line, computer['computed'])
logical_width, _ = line_size(line, computer['computed'])
result = value.value * logical_width
elif unit == 'em':
result = value.value * font_size
@ -772,10 +773,10 @@ def strut_layout(style, context=None):
if key in context.strut_layouts:
return context.strut_layouts[key]
layout = text.Layout(context, style['font_size'], style)
layout = Layout(context, style['font_size'], style)
layout.set_text(' ')
line, _ = layout.get_first_line()
_, _, _, _, text_height, baseline = text.first_line_metrics(
_, _, _, _, text_height, baseline = first_line_metrics(
line, '', layout, resume_at=None, space_collapse=False, style=style)
if style['line_height'] == 'normal':
result = text_height, baseline
@ -795,10 +796,15 @@ def ex_ratio(style):
"""Return the ratio 1ex/font_size, according to given style."""
font_size = 1000 # big value
# TODO: use context to use @font-face fonts
layout = text.Layout(context=None, font_size=font_size, style=style)
layout = Layout(context=None, font_size=font_size, style=style)
layout.set_text('x')
line, _ = layout.get_first_line()
_, ink_height_above_baseline = text.get_ink_position(line)
ink_extents = ffi.new('PangoRectangle *')
pango.pango_layout_line_get_extents(line, ink_extents, ffi.NULL)
height_above_baseline = units_to_double(ink_extents.y)
ffi.release(ink_extents)
# Zero means some kind of failure, fallback is 0.5.
# We round to try keeping exact values that were altered by Pango.
return round(-ink_height_above_baseline / font_size, 5) or 0.5
return round(-height_above_baseline / font_size, 5) or 0.5

View File

@ -41,8 +41,8 @@ class CounterStyle(dict):
.. versionadded:: 0.52
Keep a list of counter styles defined by @counter-style rules, indexed by
their names.
Keep a list of counter styles defined by ``@counter-style`` rules, indexed
by their names.
See https://www.w3.org/TR/css-counter-styles-3/.

View File

@ -184,6 +184,9 @@ INITIAL_VALUES = {
'text_decoration_style': 'solid',
# Overflow Module 3 (WD): https://www.w3.org/TR/css-overflow-3/
'block_ellipsis': 'none',
'continue': 'auto',
'max_lines': 'none',
'overflow': 'visible',
'text_overflow': 'clip',
@ -218,6 +221,7 @@ KNOWN_PROPERTIES = set(name.replace('_', '-') for name in INITIAL_VALUES)
# link: click events normally bubble up to link ancestors
# See http://lists.w3.org/Archives/Public/www-style/2012Jun/0315.html
INHERITED = {
'block_ellipsis',
'border_collapse',
'border_spacing',
'caption_side',

View File

@ -376,7 +376,7 @@ def parse_function(function_token):
space-separated arguments. Return ``None`` otherwise.
"""
if not getattr(function_token, 'type', None) == 'function':
if function_token.type != 'function':
return
content = list(remove_whitespace(function_token.arguments))

View File

@ -202,7 +202,7 @@ def font_variant(tokens):
for name, sub_tokens in expand_font_variant(tokens):
try:
values.append(properties.validate_non_shorthand(
None, 'font-variant' + name, sub_tokens, required=True))
None, f'font-variant{name}', sub_tokens, required=True))
except InvalidValues:
return None
return values

View File

@ -16,11 +16,12 @@ from ..utils import (
from .descriptors import expand_font_variant
from .properties import (
background_attachment, background_image, background_position,
background_repeat, background_size, border_style, border_width, box,
column_count, column_width, flex_basis, flex_direction, flex_grow_shrink,
flex_wrap, font_family, font_size, font_stretch, font_style, font_weight,
line_height, list_style_image, list_style_position, list_style_type,
other_colors, overflow_wrap, validate_non_shorthand)
background_repeat, background_size, block_ellipsis, border_style,
border_width, box, column_count, column_width, flex_basis, flex_direction,
flex_grow_shrink, flex_wrap, font_family, font_size, font_stretch,
font_style, font_weight, line_height, list_style_image,
list_style_position, list_style_type, other_colors, overflow_wrap,
validate_non_shorthand)
EXPANDERS = {}
@ -273,7 +274,7 @@ def expand_background(base_url, name, tokens):
def add(name, value):
if value is None:
return False
name = 'background_' + name
name = f'background_{name}'
if name in results:
raise InvalidValues
results[name] = value
@ -607,3 +608,26 @@ def expand_flex_flow(base_url, name, tokens):
raise InvalidValues
else:
raise InvalidValues
@expander('line-clamp')
def expand_line_clamp(base_url, name, tokens):
"""Expand the ``line-clamp`` property."""
if len(tokens) == 1:
keyword = get_single_keyword(tokens)
if keyword == 'none':
yield 'max_lines', 'none'
yield 'continue', 'auto'
yield 'block-ellipsis', 'none'
elif tokens[0].type == 'number' and tokens[0].int_value is not None:
yield 'max_lines', tokens[0].int_value
yield 'continue', 'discard'
yield 'block-ellipsis', 'auto'
elif len(tokens) == 2:
if tokens[0].type == 'number':
max_lines = tokens[0].int_value
ellipsis = block_ellipsis([tokens[1]])
if max_lines and ellipsis is not None:
yield 'max_lines', tokens[0].value
yield 'continue', 'discard'
yield 'block-ellipsis', ellipsis

View File

@ -326,6 +326,36 @@ def box_decoration_break(keyword):
return keyword in ('slice', 'clone')
@property()
@single_token
def block_ellipsis(token):
"""``box-ellipsis`` property validation."""
if token.type == 'string':
return ('string', token.value)
else:
keyword = get_keyword(token)
if keyword in ('none', 'auto'):
return keyword
@property('continue', unstable=True)
@single_keyword
def continue_(keyword):
"""``continue`` property validation."""
return keyword in ('auto', 'discard')
@property(unstable=True)
@single_token
def max_lines(token):
if token.type == 'number' and token.int_value is not None:
if token.int_value >= 1:
return token.int_value
keyword = get_keyword(token)
if keyword == 'none':
return keyword
@property(unstable=True)
@single_keyword
def margin_break(keyword):
@ -518,7 +548,7 @@ def counter(tokens, default_integer):
return # expected a keyword here
counter_name = token.value
if counter_name in ('none', 'initial', 'inherit'):
raise InvalidValues('Invalid counter name: ' + counter_name)
raise InvalidValues(f'Invalid counter name: {counter_name}')
token = next(tokens, None)
if token is not None and (
token.type == 'number' and token.int_value is not None):
@ -1237,10 +1267,9 @@ def anchor(token):
function = parse_function(token)
if function:
name, args = function
prototype = (name, [a.type for a in args])
args = [getattr(a, 'value', a) for a in args]
prototype = (name, [arg.type for arg in args])
if prototype == ('attr', ['ident']):
return ('attr()', args[0])
return ('attr()', args[0].value)
@property(proprietary=True, wants_base_url=True)
@ -1255,10 +1284,9 @@ def link(token, base_url):
function = parse_function(token)
if function:
name, args = function
prototype = (name, [a.type for a in args])
args = [getattr(a, 'value', a) for a in args]
prototype = (name, [arg.type for arg in args])
if prototype == ('attr', ['ident']):
return ('attr()', args[0])
return ('attr()', args[0].value)
@property()
@ -1352,10 +1380,9 @@ def lang(token):
function = parse_function(token)
if function:
name, args = function
prototype = (name, [a.type for a in args])
args = [getattr(a, 'value', a) for a in args]
prototype = (name, [arg.type for arg in args])
if prototype == ('attr', ['ident']):
return ('attr()', args[0])
return ('attr()', args[0].value)
elif token.type == 'string':
return ('string', token.value)

View File

@ -23,7 +23,6 @@ from .css import get_all_computed_styles
from .css.counters import CounterStyle
from .css.targets import TargetCollector
from .draw import draw_page, stacked
from .fonts import FontConfiguration
from .formatting_structure import boxes
from .formatting_structure.build import build_formatting_structure
from .html import W3C_DATE_RE, get_html_metadata
@ -31,7 +30,8 @@ from .images import get_image_from_uri as original_get_image_from_uri
from .layout import LayoutContext, layout_document
from .layout.percentages import percentage
from .logger import LOGGER, PROGRESS_LOGGER
from .text import ffi, pango
from .text.ffi import ffi, pango
from .text.fonts import FontConfiguration
from .urls import URLFetchingError
@ -68,22 +68,24 @@ def _w3c_date_to_pdf(string, attr_name):
class Font:
def __init__(self, file_content, pango_font):
pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL)
font_description = pango.pango_font_describe(pango_font)
font_family = ffi.string(pango.pango_font_description_get_family(
font_description))
font_size = pango.pango_font_description_get_size(font_description)
self._font_description = pango.pango_font_describe(pango_font)
self.family = ffi.string(pango.pango_font_description_get_family(
self._font_description))
font_size = pango.pango_font_description_get_size(
self._font_description)
description_string = ffi.string(
pango.pango_font_description_to_string(self._font_description))
sha = hashlib.sha256()
sha.update(file_content)
sha.update(description_string)
self.file_content = file_content
self.file_hash = hash(file_content)
self.hash = ''.join(
chr(65 + letter % 26) for letter in sha.digest()[:6])
self.name = (
b'/' + self.hash.encode('ascii') + b'+' +
font_family.replace(b' ', b''))
self.family = font_family
self.flags = 4
self.italic_angle = 0
self.family.replace(b' ', b''))
self.italic_angle = 0 # TODO: this should be different
self.ascent = int(
pango.pango_font_metrics_get_ascent(pango_metrics) /
font_size * 1000)
@ -96,6 +98,18 @@ class Font:
self.widths = {}
self.cmap = {}
@property
def flags(self):
flags = 2 ** 3 # Symbolic, custom character set
if pango.pango_font_description_get_style(self._font_description):
flags += 2 ** 7 # Italic
if b'Serif' in self.family.split():
flags += 2 ** 2 # Serif
widths = self.widths.values()
if len(widths) > 1 and len(set(widths)) == 1:
flags += 2 ** 1 # FixedPitch
return flags
class Context(pydyf.Stream):
"""PDF stream object with context storing alpha states."""
@ -635,8 +649,6 @@ class Page:
has_link = link and not isinstance(box, (boxes.TextBox, boxes.LineBox))
# In case of duplicate IDs, only the first is an anchor.
has_anchor = anchor_name and anchor_name not in self.anchors
is_attachment = getattr(box, 'is_attachment', False)
download_name = getattr(box, 'attachment_download', None)
if has_bookmark or has_link or has_anchor:
pos_x, pos_y, width, height = box.hit_area()
@ -645,18 +657,18 @@ class Page:
assert token_type == 'url'
link_type, target = link
assert isinstance(target, str)
if link_type == 'external' and is_attachment:
if link_type == 'external' and box.is_attachment:
link_type = 'attachment'
if matrix:
link = (
link_type, target,
rectangle_aabb(matrix, pos_x, pos_y, width, height),
download_name)
box.download_name)
else:
link = (
link_type, target,
(pos_x, pos_y, pos_x + width, pos_y + height),
download_name)
box.download_name)
self.links.append(link)
if matrix and (has_bookmark or has_anchor):
pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
@ -672,20 +684,16 @@ class Page:
def paint(self, context, left_x=0, top_y=0, scale=1, clip=False):
"""Paint the page into the PDF file.
:type context: :class:`pdf.Context`
:type context: ``Context``
:param context:
A context object.
:type left_x: float
:param left_x:
:param float left_x:
X coordinate of the left of the page, in PDF points.
:type top_y: float
:param top_y:
:param float top_y:
Y coordinate of the top of the page, in PDF points.
:type scale: float
:param scale:
:param float scale:
Zoom scale.
:type clip: bool
:param clip:
:param bool clip:
Whether to clip/cut content outside the page. If false or
not provided, content can overflow.
@ -763,7 +771,7 @@ class Document:
can also be instantiated directly with a list of :class:`pages <Page>`, a
set of :class:`metadata <DocumentMetadata>`, a :func:`url_fetcher
<weasyprint.default_url_fetcher>` function, and a :class:`font_config
<weasyprint.fonts.FontConfiguration>`.
<weasyprint.text.fonts.FontConfiguration>`.
"""
@ -869,11 +877,11 @@ class Document:
#: but to the whole document.
self.metadata = metadata
#: A function or other callable with the same signature as
#: :func:`default_url_fetcher` called to fetch external resources such
#: as stylesheets and images. (See :ref:`url-fetchers`.)
#: :func:`weasyprint.default_url_fetcher` called to fetch external
#: resources such as stylesheets and images. (See :ref:`URL Fetchers`.)
self.url_fetcher = url_fetcher
#: A :obj:`dict` of fonts used by the document. Keys are hashes used to
#: identify fonts, values are :class:`Font` objects.
#: identify fonts, values are ``Font`` objects.
self.fonts = {}
# Keep a reference to font_config to avoid its garbage collection until
# rendering is destroyed. This is needed as font_config.__del__ removes
@ -917,22 +925,21 @@ class Document:
def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None):
"""Paint the pages in a PDF file, with metadata.
:type target: str, pathlib.Path or file object
:type target:
:class:`str`, :class:`pathlib.Path` or :term:`file object`
:param target:
A filename where the PDF file is generated, a file object, or
:obj:`None`.
:type zoom: float
:param zoom:
:param float zoom:
The zoom factor in PDF units per CSS units. **Warning**:
All CSS units are affected, including physical units like
``cm`` and named sizes like ``A4``. For values other than
1, the physical CSS units will thus be "wrong".
:type attachments: list
:param attachments: A list of additional file attachments for the
:param list attachments: A list of additional file attachments for the
generated PDF document or :obj:`None`. The list's elements are
:class:`Attachment` objects, filenames, URLs or file-like objects.
``Attachment`` objects, filenames, URLs or file-like objects.
:param finisher: A finisher function, that accepts the document and a
``pydyf.PDF`` object as parameters, can be passed to perform
:class:`pydyf.PDF` object as parameters, can be passed to perform
post-processing on the PDF right before the trailer is written.
:returns:
The PDF as :obj:`bytes` if ``target`` is not provided or
@ -1162,22 +1169,32 @@ class Document:
pdf.catalog['Names']['EmbeddedFiles'] = content.reference
# Embeded fonts
fonts = pydyf.Dictionary()
pdf_fonts = pydyf.Dictionary()
fonts_by_file_hash = {}
for font in self.fonts.values():
if font.file_hash in fonts_by_file_hash:
fonts_by_file_hash[font.file_hash].append(font)
else:
fonts_by_file_hash[font.file_hash] = [font]
font_references_by_file_hash = {}
for file_hash, fonts in fonts_by_file_hash.items():
# Optimize font
cmap = {}
for font in fonts:
cmap = {**cmap, **font.cmap}
full_font = io.BytesIO(fonts[0].file_content)
optimized_font = io.BytesIO()
try:
full_font = io.BytesIO(font.file_content)
optimized_font = io.BytesIO()
ttfont = TTFont(full_font)
options = subset.Options(
retain_gids=True, passthrough_tables=True)
subsetter = subset.Subsetter(options)
subsetter.populate(gids=font.cmap)
subsetter.populate(gids=cmap)
subsetter.subset(ttfont)
ttfont.save(optimized_font)
content = optimized_font.getvalue()
except TTLibError:
content = font.file_content
content = fonts[0].file_content
# Include font
font_type = 'otf' if content[:4] == b'OTTO' else 'ttf'
@ -1187,7 +1204,9 @@ class Document:
font_extra = pydyf.Dictionary({'Length1': len(content)})
font_stream = pydyf.Stream([content], font_extra, compress=True)
pdf.add_object(font_stream)
font_references_by_file_hash[file_hash] = font_stream.reference
for font in self.fonts.values():
widths = pydyf.Array()
for i in sorted(font.widths):
if i - 1 not in font.widths:
@ -1199,7 +1218,7 @@ class Document:
'Type': '/FontDescriptor',
'FontName': font.name,
'FontFamily': pydyf.String(font.family),
'Flags': 32,
'Flags': font.flags,
'FontBBox': pydyf.Array(font.bbox),
'ItalicAngle': font.italic_angle,
'Ascent': font.ascent,
@ -1208,7 +1227,7 @@ class Document:
'StemV': font.stemv,
'StemH': font.stemh,
(f'FontFile{"3" if font_type == "otf" else "2"}'):
font_stream.reference,
font_references_by_file_hash[font.file_hash],
})
if font_type == 'otf':
font_descriptor['Subtype'] = '/OpenType'
@ -1262,10 +1281,10 @@ class Document:
'ToUnicode': to_unicode.reference,
})
pdf.add_object(font_dictionary)
fonts[font.hash] = font_dictionary.reference
pdf_fonts[font.hash] = font_dictionary.reference
pdf.add_object(fonts)
resources['Font'] = fonts.reference
pdf.add_object(pdf_fonts)
resources['Font'] = pdf_fonts.reference
self._use_references(pdf, resources)
# Anchors

View File

@ -14,7 +14,8 @@ from .formatting_structure import boxes
from .layout import replaced
from .layout.backgrounds import BackgroundLayer
from .stacking import StackingContext
from .text import show_first_line
from .text.ffi import ffi, harfbuzz, pango, units_from_double, units_to_double
from .text.line_break import get_last_word_end
SIDES = ('top', 'right', 'bottom', 'left')
CROP = '''
@ -991,7 +992,8 @@ def draw_replacedbox(context, box):
context, draw_width, draw_height, box.style['image_rendering'])
def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'):
def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip',
block_ellipsis='none'):
if isinstance(box, StackingContext):
stacking_context = box
assert isinstance(
@ -1003,8 +1005,13 @@ def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'):
if isinstance(box, (boxes.InlineBox, boxes.LineBox)):
if isinstance(box, boxes.LineBox):
text_overflow = box.text_overflow
block_ellipsis = box.block_ellipsis
in_text = False
for child in box.children:
ellipsis = 'none'
for i, child in enumerate(box.children):
if i == len(box.children) - 1:
# Last child
ellipsis = block_ellipsis
if isinstance(child, StackingContext):
child_offset_x = offset_x
else:
@ -1014,13 +1021,16 @@ def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'):
if not in_text:
context.begin_text()
in_text = True
draw_text(context, child, child_offset_x, text_overflow)
draw_text(
context, child, child_offset_x, text_overflow,
ellipsis)
else:
if in_text:
in_text = False
context.end_text()
draw_inline_level(
context, page, child, child_offset_x, text_overflow)
context, page, child, child_offset_x, text_overflow,
ellipsis)
if in_text:
context.end_text()
elif isinstance(box, boxes.InlineReplacedBox):
@ -1033,7 +1043,7 @@ def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'):
context.end_text()
def draw_text(context, textbox, offset_x, text_overflow):
def draw_text(context, textbox, offset_x, text_overflow, block_ellipsis):
"""Draw a textbox to a pydyf stream."""
# Pango crashes with font-size: 0
assert textbox.style['font_size']
@ -1046,39 +1056,180 @@ def draw_text(context, textbox, offset_x, text_overflow):
context.set_alpha(textbox.style['color'][3])
textbox.pango_layout.reactivate(textbox.style)
show_first_line(context, textbox, text_overflow, x, y)
draw_first_line(context, textbox, text_overflow, block_ellipsis, x, y)
# Draw text decoration
values = textbox.style['text_decoration_line']
thickness = textbox.style['font_size'] / 18 # Like other browsers do
color = textbox.style['text_decoration_color']
if color == 'currentColor':
color = textbox.style['color']
if ('overline' in values or
'line-through' in values or
'underline' in values):
metrics = textbox.pango_layout.get_font_metrics()
if 'overline' in values:
draw_text_decoration(
context, textbox, offset_x,
textbox.baseline - metrics.ascent + thickness / 2,
thickness, color)
thickness = textbox.pango_layout.underline_thickness
offset_y = (
textbox.baseline - textbox.pango_layout.ascent + thickness / 2)
if 'underline' in values:
draw_text_decoration(
context, textbox, offset_x,
textbox.baseline - metrics.underline_position + thickness / 2,
thickness, color)
thickness = textbox.pango_layout.underline_thickness
offset_y = (
textbox.baseline - textbox.pango_layout.underline_position +
thickness / 2)
if 'line-through' in values:
thickness = textbox.pango_layout.strikethrough_thickness
offset_y = (
textbox.baseline - textbox.pango_layout.strikethrough_position)
if values != 'none':
draw_text_decoration(
context, textbox, offset_x,
textbox.baseline - metrics.strikethrough_position,
thickness, color)
context, textbox, offset_x, offset_y, thickness, color)
textbox.pango_layout.deactivate()
def draw_first_line(context, textbox, text_overflow, block_ellipsis, x, y):
"""Draw the given ``textbox`` line to the document ``context``."""
pango.pango_layout_set_single_paragraph_mode(
textbox.pango_layout.layout, True)
if text_overflow == 'ellipsis' or block_ellipsis != 'none':
assert textbox.pango_layout.max_width is not None
max_width = textbox.pango_layout.max_width
pango.pango_layout_set_width(
textbox.pango_layout.layout, units_from_double(max_width))
if text_overflow == 'ellipsis':
pango.pango_layout_set_ellipsize(
textbox.pango_layout.layout, pango.PANGO_ELLIPSIZE_END)
else:
if block_ellipsis == 'auto':
ellipsis = ''
else:
assert block_ellipsis[0] == 'string'
ellipsis = block_ellipsis[1]
# Remove last word if hyphenated
new_text = textbox.pango_layout.text
if new_text.endswith(textbox.style['hyphenate_character']):
last_word_end = get_last_word_end(
new_text[:-len(textbox.style['hyphenate_character'])],
textbox.style['lang'])
if last_word_end:
new_text = new_text[:last_word_end]
textbox.pango_layout.set_text(new_text + ellipsis)
first_line, second_line = textbox.pango_layout.get_first_line()
if block_ellipsis != 'none':
while second_line:
last_word_end = get_last_word_end(
textbox.pango_layout.text[:-len(ellipsis)],
textbox.style['lang'])
if last_word_end is None:
break
new_text = textbox.pango_layout.text[:last_word_end]
textbox.pango_layout.set_text(new_text + ellipsis)
first_line, second_line = textbox.pango_layout.get_first_line()
font_size = textbox.style['font_size']
utf8_text = textbox.pango_layout.text.encode('utf-8')
previous_utf8_position = 0
runs = [first_line.runs[0]]
while runs[-1].next != ffi.NULL:
runs.append(runs[-1].next)
context.text_matrix(font_size, 0, 0, -font_size, x, y)
last_font = None
string = ''
for run in runs:
# Pango objects
glyph_item = ffi.cast('PangoGlyphItem *', run.data)
glyph_string = glyph_item.glyphs
glyphs = glyph_string.glyphs
num_glyphs = glyph_string.num_glyphs
offset = glyph_item.item.offset
clusters = glyph_string.log_clusters
# Font content
pango_font = glyph_item.item.analysis.font
pango_desc = pango.pango_font_describe(pango_font)
font_hash = ffi.string(
pango.pango_font_description_to_string(pango_desc))
fonts = context.get_fonts()
if font_hash in fonts:
font = fonts[font_hash]
else:
hb_font = pango.pango_font_get_hb_font(pango_font)
hb_face = harfbuzz.hb_font_get_face(hb_font)
hb_blob = harfbuzz.hb_face_reference_blob(hb_face)
hb_data = harfbuzz.hb_blob_get_data(hb_blob, context.length)
file_content = ffi.unpack(hb_data, int(context.length[0]))
font = context.add_font(font_hash, file_content, pango_font)
# Positions of the glyphs in the UTF-8 string
utf8_positions = [offset + clusters[i] for i in range(1, num_glyphs)]
utf8_positions.append(offset + glyph_item.item.length)
# Go through the run glyphs
if font != last_font:
if string:
context.show_text(string)
string = ''
last_font = font
context.set_font_size(font.hash, 1)
string += '<'
for i in range(num_glyphs):
glyph = glyphs[i].glyph
width = glyphs[i].geometry.width
utf8_position = utf8_positions[i]
offset = glyphs[i].geometry.x_offset / font_size
if offset:
string += f'>{-offset}<'
string += f'{glyph:04x}'
# Ink bounding box and logical widths in font
if glyph not in font.widths:
pango.pango_font_get_glyph_extents(
pango_font, glyph, context.ink_rect, context.logical_rect)
x1, y1, x2, y2 = (
context.ink_rect.x,
-context.ink_rect.y - context.ink_rect.height,
context.ink_rect.x + context.ink_rect.width,
-context.ink_rect.y)
if x1 < font.bbox[0]:
font.bbox[0] = int(units_to_double(x1 * 1000) / font_size)
if y1 < font.bbox[1]:
font.bbox[1] = int(units_to_double(y1 * 1000) / font_size)
if x2 > font.bbox[2]:
font.bbox[2] = int(units_to_double(x2 * 1000) / font_size)
if y2 > font.bbox[3]:
font.bbox[3] = int(units_to_double(y2 * 1000) / font_size)
font.widths[glyph] = int(
units_to_double(context.logical_rect.width * 1000) /
font_size)
# Kerning, word spacing, letter spacing
kerning = int(
font.widths[glyph] -
units_to_double(width * 1000) / font_size +
offset)
if kerning:
string += f'>{kerning}<'
# Mapping between glyphs and characters
if glyph not in font.cmap and glyph != pango.PANGO_GLYPH_EMPTY:
utf8_slice = slice(previous_utf8_position, utf8_position)
font.cmap[glyph] = utf8_text[utf8_slice].decode('utf-8')
previous_utf8_position = utf8_position
# Close the last glyphs list, remove if empty
if string[-1] == '<':
string = string[:-1]
else:
string += '>'
# Draw text
context.show_text(string)
def draw_wave(context, x, y, width, offset_x, radius):
up = 1
max_x = x + width

View File

@ -76,11 +76,13 @@ class Box:
is_for_root_element = False
is_column = False
is_leader = False
is_attachment = False
# Other properties
transformation_matrix = None
bookmark_label = None
string_set = None
download_name = None
# Default, overriden on some subclasses
def all_children(self):
@ -338,7 +340,7 @@ class ParentBox(Box):
"""A flat generator for a box, its children and descendants."""
yield self
for child in self.children:
if hasattr(child, 'descendants'):
if isinstance(child, ParentBox):
for grand_child in child.descendants():
yield grand_child
else:
@ -410,6 +412,7 @@ class LineBox(ParentBox):
"""
text_overflow = 'clip'
block_ellipsis = 'none'
@classmethod
def anonymous_from(cls, parent, *args, **kwargs):

View File

@ -49,7 +49,7 @@ def ascii_lower(string):
This is used for `ASCII case-insensitive
<http://whatwg.org/C#ascii-case-insensitive>`_ matching.
This is different from the :meth:`~py:str.lower` method of Unicode strings
This is different from the :meth:`str.lower` method of Unicode strings
which also affect non-ASCII characters,
sometimes mapping them into the ASCII range:
@ -249,7 +249,7 @@ def handle_td(element, box, _get_image_from_uri, _base_url):
def handle_a(element, box, _get_image_from_uri, base_url):
"""Handle the ``rel`` attribute."""
box.is_attachment = element_has_link_type(element, 'attachment')
box.attachment_download = element.get('download')
box.download_name = element.get('download')
return [box]

View File

@ -206,7 +206,7 @@ def absolute_block(context, box, containing_block, fixed_boxes):
new_box, _, _, _, _ = block_container_layout(
context, box, max_position_y=float('inf'), skip_stack=None,
page_is_empty=False, absolute_boxes=absolute_boxes,
fixed_boxes=fixed_boxes, adjoining_margins=None)
fixed_boxes=fixed_boxes, adjoining_margins=None, discard=False)
for child_placeholder in absolute_boxes:
absolute_layout(context, child_placeholder, new_box, fixed_boxes)

View File

@ -21,7 +21,7 @@ from .tables import table_layout, table_wrapper_width
def block_level_layout(context, box, max_position_y, skip_stack,
containing_block, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins):
fixed_boxes, adjoining_margins, discard):
"""Lay out the block-level ``box``.
:param max_position_y: the absolute vertical position (as in
@ -57,12 +57,12 @@ def block_level_layout(context, box, max_position_y, skip_stack,
return block_level_layout_switch(
context, box, max_position_y, skip_stack, containing_block,
page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins)
page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins, discard)
def block_level_layout_switch(context, box, max_position_y, skip_stack,
containing_block, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins):
fixed_boxes, adjoining_margins, discard):
"""Call the layout function corresponding to the ``box`` type."""
if isinstance(box, boxes.TableBox):
return table_layout(
@ -71,7 +71,8 @@ def block_level_layout_switch(context, box, max_position_y, skip_stack,
elif isinstance(box, boxes.BlockBox):
return block_box_layout(
context, box, max_position_y, skip_stack, containing_block,
page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins)
page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins,
discard)
elif isinstance(box, boxes.BlockReplacedBox):
box = block_replaced_box_layout(box, containing_block)
# Don't collide with floats
@ -93,7 +94,7 @@ def block_level_layout_switch(context, box, max_position_y, skip_stack,
def block_box_layout(context, box, max_position_y, skip_stack,
containing_block, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins):
fixed_boxes, adjoining_margins, discard):
"""Lay out the block ``box``."""
if (box.style['column_width'] != 'auto' or
box.style['column_count'] != 'auto'):
@ -124,7 +125,7 @@ def block_box_layout(context, box, max_position_y, skip_stack,
new_box, resume_at, next_page, adjoining_margins, collapsing_through = \
block_container_layout(
context, box, max_position_y, skip_stack, page_is_empty,
absolute_boxes, fixed_boxes, adjoining_margins)
absolute_boxes, fixed_boxes, adjoining_margins, discard)
if new_box and new_box.is_table_wrapper:
# Don't collide with floats
# http://www.w3.org/TR/CSS21/visuren.html#floats
@ -256,7 +257,7 @@ def relative_positioning(box, containing_block):
def block_container_layout(context, box, max_position_y, skip_stack,
page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins=None):
adjoining_margins, discard):
"""Set the ``box`` height."""
# TODO: boxes.FlexBox is allowed here because flex_layout calls
# block_container_layout, there's probably a better solution.
@ -273,10 +274,14 @@ def block_container_layout(context, box, max_position_y, skip_stack,
is_start = skip_stack is None
box.remove_decoration(start=not is_start, end=False)
discard |= box.style['continue'] == 'discard'
draw_bottom_decoration = (
discard or box.style['box_decoration_break'] == 'clone')
if adjoining_margins is None:
adjoining_margins = []
if box.style['box_decoration_break'] == 'clone':
if draw_bottom_decoration:
max_position_y -= (
box.padding_bottom + box.border_bottom_width +
box.margin_bottom)
@ -371,14 +376,14 @@ def block_container_layout(context, box, max_position_y, skip_stack,
new_containing_block, absolute_boxes, fixed_boxes,
first_letter_style)
is_page_break = False
for line, resume_at in lines_iterator:
for i, (line, resume_at) in enumerate(lines_iterator):
line.resume_at = resume_at
new_position_y = line.position_y + line.height
# Add bottom padding and border to the bottom position of the
# box if needed
if resume_at is None or (
box.style['box_decoration_break'] == 'clone'):
draw_bottom_decoration |= resume_at is None
if draw_bottom_decoration:
offset_y = box.border_bottom_width + box.padding_bottom
else:
offset_y = 0
@ -431,6 +436,13 @@ def block_container_layout(context, box, max_position_y, skip_stack,
new_children.append(line)
position_y = new_position_y
skip_stack = resume_at
# Break box if we reached max-lines
if box.style['max_lines'] != 'none':
if i >= box.style['max_lines'] - 1:
line.block_ellipsis = box.style['block_ellipsis']
break
if new_children:
resume_at = (index, new_children[-1].resume_at)
if is_page_break:
@ -505,7 +517,7 @@ def block_container_layout(context, box, max_position_y, skip_stack,
collapsing_through) = block_level_layout(
context, child, max_position_y, skip_stack,
new_containing_block, page_is_empty_with_no_children,
absolute_boxes, fixed_boxes, adjoining_margins)
absolute_boxes, fixed_boxes, adjoining_margins, discard)
skip_stack = None
if new_child is not None:
@ -595,7 +607,11 @@ def block_container_layout(context, box, max_position_y, skip_stack,
else:
resume_at = None
if (resume_at is not None and
box_is_fragmented = resume_at is not None
if box.style['continue'] == 'discard':
resume_at = None
if (box_is_fragmented and
box.style['break_inside'] in ('avoid', 'avoid-page') and
not page_is_empty):
return (
@ -638,8 +654,15 @@ def block_container_layout(context, box, max_position_y, skip_stack,
position_y += collapse_margin(adjoining_margins)
adjoining_margins = []
# Add block ellipsis
if box_is_fragmented and new_children:
last_child = new_children[-1]
if isinstance(last_child, boxes.LineBox):
last_child.block_ellipsis = box.style['block_ellipsis']
new_box = box.copy_with_children(new_children)
new_box.remove_decoration(start=not is_start, end=resume_at is not None)
new_box.remove_decoration(
start=not is_start, end=box_is_fragmented and not discard)
# TODO: See corner cases in
# http://www.w3.org/TR/CSS21/visudet.html#normal-block
@ -663,19 +686,19 @@ def block_container_layout(context, box, max_position_y, skip_stack,
if not isinstance(new_box, boxes.BlockBox):
context.finish_block_formatting_context(new_box)
if resume_at is None:
if discard or not box_is_fragmented:
# After finish_block_formatting_context which may increment
# new_box.height
new_box.height = max(
min(new_box.height, new_box.max_height),
new_box.min_height)
else:
elif max_position_y < float('inf'):
# Make the box fill the blank space at the bottom of the page
# https://www.w3.org/TR/css-break-3/#box-splitting
new_box.height = (
max_position_y - new_box.position_y -
(new_box.margin_height() - new_box.height))
if box.style['box_decoration_break'] == 'clone':
if draw_bottom_decoration:
new_box.height += (
box.padding_bottom + box.border_bottom_width +
box.margin_bottom)

View File

@ -124,7 +124,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block,
new_child, _, _, adjoining_margins, _ = block_level_layout(
context, block, original_max_position_y, skip_stack,
containing_block, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins)
adjoining_margins, discard=False)
new_children.append(new_child)
current_position_y = (
new_child.border_height() + new_child.border_box_y())
@ -142,7 +142,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block,
column_box = create_column_box(column_children)
new_child, _, _, _, _ = block_box_layout(
context, column_box, float('inf'), skip_stack, containing_block,
page_is_empty, [], [], [])
page_is_empty, [], [], [], discard=False)
height = new_child.margin_height()
if style['column_fill'] == 'balance':
height /= count
@ -163,7 +163,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block,
new_box, resume_at, next_page, _, _ = block_box_layout(
context, column_box, box.content_box_y() + height,
column_skip_stack, containing_block, page_is_empty,
[], [], [])
[], [], [], discard=False)
if new_box is None:
# We didn't render anything. Give up and use the max
# content height.
@ -184,7 +184,8 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block,
# Get the minimum size needed to render the next box
next_box, _, _, _, _ = block_box_layout(
context, column_box, box.content_box_y(),
column_skip_stack, containing_block, True, [], [], [])
column_skip_stack, containing_block, True, [], [], [],
discard=False)
for child in next_box.children:
if child.is_in_normal_flow():
next_box_size = child.margin_height()
@ -246,7 +247,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block,
block_box_layout(
context, column_box, max_position_y, skip_stack,
containing_block, page_is_empty, absolute_boxes,
fixed_boxes, None))
fixed_boxes, None, discard=False))
if new_child is None:
break
next_page = column_next_page

View File

@ -52,7 +52,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
else:
main_space = max_position_y - box.position_y
if containing_block.height != 'auto':
if hasattr(containing_block.height, 'unit'):
if isinstance(containing_block.height, Dimension):
assert containing_block.height.unit == 'px'
main_space = min(main_space, containing_block.height.value)
else:
@ -69,7 +69,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
if cross == 'height':
main_space = max_position_y - box.content_box_y()
if containing_block.height != 'auto':
if hasattr(containing_block.height, 'unit'):
if isinstance(containing_block.height, Dimension):
assert containing_block.height.unit == 'px'
main_space = min(main_space, containing_block.height.value)
else:
@ -154,7 +154,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
new_child.style['max_height'] = Dimension(float('inf'), 'px')
new_child = blocks.block_level_layout(
context, new_child, float('inf'), child_skip_stack,
parent_box, page_is_empty, [], [], [])[0]
parent_box, page_is_empty, [], [], [], False)[0]
content_size = new_child.height
child.min_height = min(specified_size, content_size)
@ -214,7 +214,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
new_child = blocks.block_level_layout(
context, new_child, float('inf'), child_skip_stack,
parent_box, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins=[])[0]
adjoining_margins=[], discard=False)[0]
child.flex_base_size = new_child.margin_height()
elif child.style[axis] == 'min-content':
child.style[axis] = 'auto'
@ -229,7 +229,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
new_child = blocks.block_level_layout(
context, new_child, float('inf'), child_skip_stack,
parent_box, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins=[])[0]
adjoining_margins=[], discard=False)[0]
child.flex_base_size = new_child.margin_height()
else:
assert child.style[axis].unit == 'px'
@ -463,7 +463,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
blocks.block_level_layout_switch(
context, child_copy, float('inf'), child_skip_stack,
parent_box, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins=[]))
adjoining_margins=[], discard=False))
child._baseline = find_in_flow_baseline(new_child) or 0
if cross == 'height':
@ -842,7 +842,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
new_child, child_resume_at = blocks.block_level_layout_switch(
context, child, max_position_y, child_skip_stack, box,
page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins=[])[:2]
adjoining_margins=[], discard=False)[:2]
if new_child is None:
if resume_at and resume_at[0]:
resume_at = (resume_at[0] + i - 1, None)

View File

@ -66,7 +66,7 @@ def float_layout(context, box, containing_block, absolute_boxes, fixed_boxes):
context, box, max_position_y=float('inf'),
skip_stack=None, page_is_empty=False,
absolute_boxes=absolute_boxes, fixed_boxes=fixed_boxes,
adjoining_margins=None)
adjoining_margins=None, discard=False)
context.finish_block_formatting_context(box)
elif isinstance(box, boxes.FlexContainerBox):
box, _, _, _, _ = flex_layout(

View File

@ -11,7 +11,7 @@ import unicodedata
from ..css import computed_from_cascaded
from ..css.computed_values import ex_ratio, strut_layout
from ..formatting_structure import boxes
from ..text import can_break_text, create_layout, split_first_line
from ..text.line_break import can_break_text, create_layout, split_first_line
from .absolute import AbsolutePlaceholder, absolute_layout
from .flex import flex_layout
from .float import avoid_collisions, float_layout
@ -498,8 +498,8 @@ def replaced_box_height(box):
def inline_replaced_box_layout(box, containing_block):
"""Lay out an inline :class:`boxes.ReplacedBox` ``box``."""
for side in ['top', 'right', 'bottom', 'left']:
if getattr(box, 'margin_' + side) == 'auto':
setattr(box, 'margin_' + side, 0)
if getattr(box, f'margin_{side}') == 'auto':
setattr(box, f'margin_{side}', 0)
inline_replaced_box_width_height(box, containing_block)
@ -616,7 +616,7 @@ def inline_block_box_layout(context, box, position_x, skip_stack,
box, _, _, _, _ = block_container_layout(
context, box, max_position_y=float('inf'), skip_stack=skip_stack,
page_is_empty=True, absolute_boxes=absolute_boxes,
fixed_boxes=fixed_boxes)
fixed_boxes=fixed_boxes, adjoining_margins=None, discard=False)
box.baseline = inline_block_baseline(box)
return box
@ -719,8 +719,8 @@ def split_inline_level(context, box, position_x, max_x, skip_stack,
box.position_x = position_x
box.position_y = 0
for side in ['top', 'right', 'bottom', 'left']:
if getattr(box, 'margin_' + side) == 'auto':
setattr(box, 'margin_' + side, 0)
if getattr(box, f'margin_{side}') == 'auto':
setattr(box, f'margin_{side}', 0)
new_box, resume_at, _, _, _ = flex_layout(
context, box, float('inf'), skip_stack, containing_block,
False, absolute_boxes, fixed_boxes)

View File

@ -437,7 +437,8 @@ def margin_box_content_layout(context, page, box):
box, resume_at, next_page, _, _ = block_container_layout(
context, box,
max_position_y=float('inf'), skip_stack=None,
page_is_empty=True, absolute_boxes=[], fixed_boxes=[])
page_is_empty=True, absolute_boxes=[], fixed_boxes=[],
adjoining_margins=None, discard=False)
assert resume_at is None
vertical_align = box.style['vertical_align']
@ -548,7 +549,7 @@ def make_page(context, root_box, page_type, resume_at, page_number,
root_box, resume_at, next_page, _, _ = block_level_layout(
context, root_box, page_content_bottom, resume_at,
initial_containing_block, page_is_empty, positioned_boxes,
positioned_boxes, adjoining_margins)
positioned_boxes, adjoining_margins, discard=False)
assert root_box
page.fixed_boxes = [

View File

@ -12,8 +12,8 @@
import sys
from .. import text
from ..formatting_structure import boxes
from ..text.line_break import split_first_line
from .replaced import default_image_sizing
@ -294,7 +294,7 @@ def inline_line_widths(context, box, outer, is_line_start, minimum,
while new_resume_at is not None:
resume_at += new_resume_at
_, _, new_resume_at, width, _, _ = (
text.split_first_line(
split_first_line(
child_text[resume_at:], child.style, context,
max_width, child.justification_spacing,
minimum=True))

View File

@ -142,12 +142,11 @@ def table_layout(context, table, max_position_y, skip_stack, containing_block,
cell.computed_height = cell.height
cell.height = 'auto'
cell, _, _, _, _ = block_container_layout(
context, cell,
max_position_y=float('inf'),
skip_stack=None,
page_is_empty=False,
context, cell, max_position_y=float('inf'),
skip_stack=None, page_is_empty=False,
absolute_boxes=absolute_boxes,
fixed_boxes=fixed_boxes)
fixed_boxes=fixed_boxes, adjoining_margins=None,
discard=False)
cell.empty = not any(
child.is_floated() or child.is_in_normal_flow()
for child in cell.children)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,381 @@
"""
weasyprint.text.constants
-------------------------
Constants used for text layout.
"""
from .ffi import pango
# Pango features
PANGO_STYLE = {
'normal': pango.PANGO_STYLE_NORMAL,
'oblique': pango.PANGO_STYLE_OBLIQUE,
'italic': pango.PANGO_STYLE_ITALIC,
}
PANGO_STRETCH = {
'ultra-condensed': pango.PANGO_STRETCH_ULTRA_CONDENSED,
'extra-condensed': pango.PANGO_STRETCH_EXTRA_CONDENSED,
'condensed': pango.PANGO_STRETCH_CONDENSED,
'semi-condensed': pango.PANGO_STRETCH_SEMI_CONDENSED,
'normal': pango.PANGO_STRETCH_NORMAL,
'semi-expanded': pango.PANGO_STRETCH_SEMI_EXPANDED,
'expanded': pango.PANGO_STRETCH_EXPANDED,
'extra-expanded': pango.PANGO_STRETCH_EXTRA_EXPANDED,
'ultra-expanded': pango.PANGO_STRETCH_ULTRA_EXPANDED,
}
PANGO_WRAP_MODE = {
'WRAP_WORD': pango.PANGO_WRAP_WORD,
'WRAP_CHAR': pango.PANGO_WRAP_CHAR,
'WRAP_WORD_CHAR': pango.PANGO_WRAP_WORD_CHAR
}
# Language system tags
# From https://docs.microsoft.com/typography/opentype/spec/languagetags
LST_TO_ISO = {
'aba': 'abq',
'afk': 'afr',
'afr': 'aar',
'agw': 'ahg',
'als': 'gsw',
'alt': 'atv',
'ari': 'aiw',
'ark': 'mhv',
'ath': 'apk',
'avr': 'ava',
'bad': 'bfq',
'bad0': 'bad',
'bag': 'bfy',
'bal': 'krc',
'bau': 'bci',
'bch': 'bcq',
'bgr': 'bul',
'bil': 'byn',
'bkf': 'bla',
'bli': 'bal',
'bln': 'bjt',
'blt': 'bft',
'bmb': 'bam',
'bri': 'bra',
'brm': 'mya',
'bsh': 'bak',
'bti': 'btb',
'chg': 'sgw',
'chh': 'hne',
'chi': 'nya',
'chk': 'ckt',
'chk0': 'chk',
'chu': 'chv',
'chy': 'chy',
'cmr': 'swb',
'crr': 'crx',
'crt': 'crh',
'csl': 'chu',
'csy': 'ces',
'dcr': 'cwd',
'dgr': 'doi',
'djr': 'dje',
'djr0': 'djr',
'dng': 'ada',
'dnk': 'din',
'dri': 'prs',
'dun': 'dng',
'dzn': 'dzo',
'ebi': 'igb',
'ecr': 'crj',
'edo': 'bin',
'erz': 'myv',
'esp': 'spa',
'eti': 'est',
'euq': 'eus',
'evk': 'evn',
'evn': 'eve',
'fan': 'acf',
'fan0': 'fan',
'far': 'fas',
'fji': 'fij',
'fle': 'vls',
'fne': 'enf',
'fos': 'fao',
'fri': 'fry',
'frl': 'fur',
'frp': 'frp',
'fta': 'fuf',
'gad': 'gaa',
'gae': 'gla',
'gal': 'glg',
'gaw': 'gbm',
'gil': 'niv',
'gil0': 'gil',
'gmz': 'guk',
'grn': 'kal',
'gro': 'grt',
'gua': 'grn',
'hai': 'hat',
'hal': 'flm',
'har': 'hoj',
'hbn': 'amf',
'hma': 'mrj',
'hnd': 'hno',
'ho': 'hoc',
'hri': 'har',
'hye0': 'hye',
'ijo': 'ijc',
'ing': 'inh',
'inu': 'iku',
'iri': 'gle',
'irt': 'gle',
'ism': 'smn',
'iwr': 'heb',
'jan': 'jpn',
'jii': 'yid',
'jud': 'lad',
'jul': 'dyu',
'kab': 'kbd',
'kab0': 'kab',
'kac': 'kfr',
'kal': 'kln',
'kar': 'krc',
'keb': 'ktb',
'kge': 'kat',
'kha': 'kjh',
'khk': 'kca',
'khs': 'kca',
'khv': 'kca',
'kis': 'kqs',
'kkn': 'kex',
'klm': 'xal',
'kmb': 'kam',
'kmn': 'kfy',
'kmo': 'kmw',
'kms': 'kxc',
'knr': 'kau',
'kod': 'kfa',
'koh': 'okm',
'kon': 'ktu',
'kon0': 'kon',
'kop': 'koi',
'koz': 'kpv',
'kpl': 'kpe',
'krk': 'kaa',
'krm': 'kdr',
'krn': 'kar',
'krt': 'kqy',
'ksh': 'kas',
'ksh0': 'ksh',
'ksi': 'kha',
'ksm': 'sjd',
'kui': 'kxu',
'kul': 'kfx',
'kuu': 'kru',
'kuy': 'kdt',
'kyk': 'kpy',
'lad': 'lld',
'lah': 'bfu',
'lak': 'lbe',
'lam': 'lmn',
'laz': 'lzz',
'lcr': 'crm',
'ldk': 'lbj',
'lma': 'mhr',
'lmb': 'lif',
'lmw': 'ngl',
'lsb': 'dsb',
'lsm': 'smj',
'lth': 'lit',
'luh': 'luy',
'lvi': 'lav',
'maj': 'mpe',
'mak': 'vmw',
'man': 'mns',
'map': 'arn',
'maw': 'mwr',
'mbn': 'kmb',
'mch': 'mnc',
'mcr': 'crm',
'mde': 'men',
'men': 'mym',
'miz': 'lus',
'mkr': 'mak',
'mle': 'mdy',
'mln': 'mlq',
'mlr': 'mal',
'mly': 'msa',
'mnd': 'mnk',
'mng': 'mon',
'mnk': 'man',
'mnx': 'glv',
'mok': 'mdf',
'mon': 'mnw',
'mth': 'mai',
'mts': 'mlt',
'mun': 'unr',
'nan': 'gld',
'nas': 'nsk',
'ncr': 'csw',
'ndg': 'ndo',
'nhc': 'csw',
'nis': 'dap',
'nkl': 'nyn',
'nko': 'nqo',
'nor': 'nob',
'nsm': 'sme',
'nta': 'nod',
'nto': 'epo',
'nyn': 'nno',
'ocr': 'ojs',
'ojb': 'oji',
'oro': 'orm',
'paa': 'sam',
'pal': 'pli',
'pap': 'plp',
'pap0': 'pap',
'pas': 'pus',
'pgr': 'ell',
'pil': 'fil',
'plg': 'pce',
'plk': 'pol',
'ptg': 'por',
'qin': 'bgr',
'rbu': 'bxr',
'rcr': 'atj',
'rms': 'roh',
'rom': 'ron',
'roy': 'rom',
'rsy': 'rue',
'rua': 'kin',
'sad': 'sck',
'say': 'chp',
'sek': 'xan',
'sel': 'sel',
'sgo': 'sag',
'sgs': 'sgs',
'sib': 'sjo',
'sig': 'xst',
'sks': 'sms',
'sky': 'slk',
'sla': 'scs',
'sml': 'som',
'sna': 'seh',
'sna0': 'sna',
'snh': 'sin',
'sog': 'gru',
'srb': 'srp',
'ssl': 'xsl',
'ssm': 'sma',
'sur': 'suq',
'sve': 'swe',
'swa': 'aii',
'swk': 'swa',
'swz': 'ssw',
'sxt': 'ngo',
'taj': 'tgk',
'tcr': 'cwd',
'tgn': 'ton',
'tgr': 'tig',
'tgy': 'tir',
'tht': 'tah',
'tib': 'bod',
'tkm': 'tuk',
'tmn': 'tem',
'tna': 'tsn',
'tne': 'enh',
'tng': 'toi',
'tod': 'xal',
'tod0': 'tod',
'trk': 'tur',
'tsg': 'tso',
'tua': 'tru',
'tul': 'tcy',
'tuv': 'tyv',
'twi': 'aka',
'usb': 'hsb',
'uyg': 'uig',
'vit': 'vie',
'vro': 'vro',
'wa': 'wbm',
'wag': 'wbr',
'wcr': 'crk',
'wel': 'cym',
'wlf': 'wol',
'xbd': 'khb',
'xhs': 'xho',
'yak': 'sah',
'yba': 'yor',
'ycr': 'cre',
'yim': 'iii',
'zhh': 'zho',
'zhp': 'zho',
'zhs': 'zho',
'zht': 'zho',
'znd': 'zne',
}
# Font features
LIGATURE_KEYS = {
'common-ligatures': ['liga', 'clig'],
'historical-ligatures': ['hlig'],
'discretionary-ligatures': ['dlig'],
'contextual': ['calt'],
}
CAPS_KEYS = {
'small-caps': ['smcp'],
'all-small-caps': ['c2sc', 'smcp'],
'petite-caps': ['pcap'],
'all-petite-caps': ['c2pc', 'pcap'],
'unicase': ['unic'],
'titling-caps': ['titl'],
}
NUMERIC_KEYS = {
'lining-nums': 'lnum',
'oldstyle-nums': 'onum',
'proportional-nums': 'pnum',
'tabular-nums': 'tnum',
'diagonal-fractions': 'frac',
'stacked-fractions': 'afrc',
'ordinal': 'ordn',
'slashed-zero': 'zero',
}
EAST_ASIAN_KEYS = {
'jis78': 'jp78',
'jis83': 'jp83',
'jis90': 'jp90',
'jis04': 'jp04',
'simplified': 'smpl',
'traditional': 'trad',
'full-width': 'fwid',
'proportional-width': 'pwid',
'ruby': 'ruby',
}
# Fontconfig features
FONTCONFIG_WEIGHT = {
'normal': 'normal',
'bold': 'bold',
100: 'thin',
200: 'extralight',
300: 'light',
400: 'normal',
500: 'medium',
600: 'demibold',
700: 'bold',
800: 'extrabold',
900: 'black',
}
FONTCONFIG_STYLE = {
'normal': 'roman',
'italic': 'italic',
'oblique': 'oblique',
}
FONTCONFIG_STRETCH = {
'normal': 'normal',
'ultra-condensed': 'ultracondensed',
'extra-condensed': 'extracondensed',
'condensed': 'condensed',
'semi-condensed': 'semicondensed',
'semi-expanded': 'semiexpanded',
'expanded': 'expanded',
'extra-expanded': 'extraexpanded',
'ultra-expanded': 'ultraexpanded',
}

407
weasyprint/text/ffi.py Normal file
View File

@ -0,0 +1,407 @@
"""
weasyprint.text.ffi
-------------------
Imports of dynamic libraries used for text layout.
"""
import cffi
ffi = cffi.FFI()
ffi.cdef('''
// HarfBuzz
typedef ... hb_font_t;
typedef ... hb_face_t;
typedef ... hb_blob_t;
hb_face_t * hb_font_get_face (hb_font_t *font);
hb_blob_t * hb_face_reference_blob (hb_face_t *face);
const char * hb_blob_get_data (hb_blob_t *blob, unsigned int *length);
// Pango
typedef unsigned int guint;
typedef int gint;
typedef char gchar;
typedef gint gboolean;
typedef void* gpointer;
typedef ... PangoLayout;
typedef ... PangoContext;
typedef ... PangoFontMap;
typedef ... PangoFontMetrics;
typedef ... PangoLanguage;
typedef ... PangoTabArray;
typedef ... PangoFontDescription;
typedef ... PangoLayoutIter;
typedef ... PangoAttrList;
typedef ... PangoAttrClass;
typedef ... PangoFont;
typedef guint PangoGlyph;
typedef gint PangoGlyphUnit;
const guint PANGO_GLYPH_EMPTY = 0x0FFFFFFF;
typedef enum {
PANGO_STYLE_NORMAL,
PANGO_STYLE_OBLIQUE,
PANGO_STYLE_ITALIC
} PangoStyle;
typedef enum {
PANGO_WEIGHT_THIN = 100,
PANGO_WEIGHT_ULTRALIGHT = 200,
PANGO_WEIGHT_LIGHT = 300,
PANGO_WEIGHT_BOOK = 380,
PANGO_WEIGHT_NORMAL = 400,
PANGO_WEIGHT_MEDIUM = 500,
PANGO_WEIGHT_SEMIBOLD = 600,
PANGO_WEIGHT_BOLD = 700,
PANGO_WEIGHT_ULTRABOLD = 800,
PANGO_WEIGHT_HEAVY = 900,
PANGO_WEIGHT_ULTRAHEAVY = 1000
} PangoWeight;
typedef enum {
PANGO_STRETCH_ULTRA_CONDENSED,
PANGO_STRETCH_EXTRA_CONDENSED,
PANGO_STRETCH_CONDENSED,
PANGO_STRETCH_SEMI_CONDENSED,
PANGO_STRETCH_NORMAL,
PANGO_STRETCH_SEMI_EXPANDED,
PANGO_STRETCH_EXPANDED,
PANGO_STRETCH_EXTRA_EXPANDED,
PANGO_STRETCH_ULTRA_EXPANDED
} PangoStretch;
typedef enum {
PANGO_WRAP_WORD,
PANGO_WRAP_CHAR,
PANGO_WRAP_WORD_CHAR
} PangoWrapMode;
typedef enum {
PANGO_TAB_LEFT
} PangoTabAlign;
typedef enum {
PANGO_ELLIPSIZE_NONE,
PANGO_ELLIPSIZE_START,
PANGO_ELLIPSIZE_MIDDLE,
PANGO_ELLIPSIZE_END
} PangoEllipsizeMode;
typedef struct GSList {
gpointer data;
struct GSList *next;
} GSList;
typedef struct {
const PangoAttrClass *klass;
guint start_index;
guint end_index;
} PangoAttribute;
typedef struct {
PangoLayout *layout;
gint start_index;
gint length;
GSList *runs;
guint is_paragraph_start : 1;
guint resolved_dir : 3;
} PangoLayoutLine;
typedef struct {
int x;
int y;
int width;
int height;
} PangoRectangle;
typedef struct {
guint is_line_break: 1;
guint is_mandatory_break : 1;
guint is_char_break : 1;
guint is_white : 1;
guint is_cursor_position : 1;
guint is_word_start : 1;
guint is_word_end : 1;
guint is_sentence_boundary : 1;
guint is_sentence_start : 1;
guint is_sentence_end : 1;
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
} PangoLogAttr;
typedef struct {
void *shape_engine;
void *lang_engine;
PangoFont *font;
guint level;
guint gravity;
guint flags;
guint script;
PangoLanguage *language;
GSList *extra_attrs;
} PangoAnalysis;
typedef struct {
gint offset;
gint length;
gint num_chars;
PangoAnalysis analysis;
} PangoItem;
typedef struct {
PangoGlyphUnit width;
PangoGlyphUnit x_offset;
PangoGlyphUnit y_offset;
} PangoGlyphGeometry;
typedef struct {
guint is_cluster_start : 1;
} PangoGlyphVisAttr;
typedef struct {
PangoGlyph glyph;
PangoGlyphGeometry geometry;
PangoGlyphVisAttr attr;
} PangoGlyphInfo;
typedef struct {
gint num_glyphs;
PangoGlyphInfo *glyphs;
gint *log_clusters;
} PangoGlyphString;
typedef struct {
PangoItem *item;
PangoGlyphString *glyphs;
} PangoGlyphItem;
int pango_version (void);
double pango_units_to_double (int i);
int pango_units_from_double (double d);
void g_object_unref (gpointer object);
void g_type_init (void);
PangoLayout * pango_layout_new (PangoContext *context);
void pango_layout_set_width (PangoLayout *layout, int width);
PangoAttrList * pango_layout_get_attributes(PangoLayout *layout);
void pango_layout_set_attributes (
PangoLayout *layout, PangoAttrList *attrs);
void pango_layout_set_text (
PangoLayout *layout, const char *text, int length);
void pango_layout_set_tabs (
PangoLayout *layout, PangoTabArray *tabs);
void pango_layout_set_font_description (
PangoLayout *layout, const PangoFontDescription *desc);
void pango_layout_set_wrap (
PangoLayout *layout, PangoWrapMode wrap);
void pango_layout_set_single_paragraph_mode (
PangoLayout *layout, gboolean setting);
int pango_layout_get_baseline (PangoLayout *layout);
PangoLayoutLine * pango_layout_get_line_readonly (
PangoLayout *layout, int line);
hb_font_t * pango_font_get_hb_font (PangoFont *font);
PangoFontDescription * pango_font_description_new (void);
void pango_font_description_free (PangoFontDescription *desc);
PangoFontDescription * pango_font_description_copy (
const PangoFontDescription *desc);
void pango_font_description_set_family (
PangoFontDescription *desc, const char *family);
void pango_font_description_set_style (
PangoFontDescription *desc, PangoStyle style);
PangoStyle pango_font_description_get_style (
const PangoFontDescription *desc);
void pango_font_description_set_stretch (
PangoFontDescription *desc, PangoStretch stretch);
void pango_font_description_set_weight (
PangoFontDescription *desc, PangoWeight weight);
void pango_font_description_set_absolute_size (
PangoFontDescription *desc, double size);
int pango_font_description_get_size (PangoFontDescription *desc);
int pango_glyph_string_get_width (PangoGlyphString *glyphs);
char * pango_font_description_to_string (
const PangoFontDescription *desc);
PangoFontDescription * pango_font_describe (PangoFont *font);
const char * pango_font_description_get_family (
const PangoFontDescription *desc);
int pango_font_description_hash (const PangoFontDescription *desc);
PangoContext * pango_context_new ();
PangoContext * pango_font_map_create_context (PangoFontMap *fontmap);
PangoFontMetrics * pango_context_get_metrics (
PangoContext *context, const PangoFontDescription *desc,
PangoLanguage *language);
void pango_font_metrics_unref (PangoFontMetrics *metrics);
int pango_font_metrics_get_ascent (PangoFontMetrics *metrics);
int pango_font_metrics_get_descent (PangoFontMetrics *metrics);
int pango_font_metrics_get_underline_thickness (
PangoFontMetrics *metrics);
int pango_font_metrics_get_underline_position (
PangoFontMetrics *metrics);
int pango_font_metrics_get_strikethrough_thickness (
PangoFontMetrics *metrics);
int pango_font_metrics_get_strikethrough_position (
PangoFontMetrics *metrics);
void pango_context_set_round_glyph_positions (
PangoContext *context, gboolean round_positions);
PangoFontMetrics * pango_font_get_metrics (
PangoFont *font, PangoLanguage *language);
void pango_font_get_glyph_extents (
PangoFont *font, PangoGlyph glyph, PangoRectangle *ink_rect,
PangoRectangle *logical_rect);
PangoAttrList * pango_attr_list_new (void);
void pango_attr_list_unref (PangoAttrList *list);
void pango_attr_list_insert (
PangoAttrList *list, PangoAttribute *attr);
void pango_attr_list_change (
PangoAttrList *list, PangoAttribute *attr);
PangoAttribute * pango_attr_font_features_new (const gchar *features);
PangoAttribute * pango_attr_letter_spacing_new (int letter_spacing);
void pango_attribute_destroy (PangoAttribute *attr);
PangoTabArray * pango_tab_array_new_with_positions (
gint size, gboolean positions_in_pixels, PangoTabAlign first_alignment,
gint first_position, ...);
void pango_tab_array_free (PangoTabArray *tab_array);
PangoLanguage * pango_language_from_string (const char *language);
PangoLanguage * pango_language_get_default (void);
void pango_context_set_language (
PangoContext *context, PangoLanguage *language);
void pango_context_set_font_map (
PangoContext *context, PangoFontMap *font_map);
void pango_layout_line_get_extents (
PangoLayoutLine *line,
PangoRectangle *ink_rect, PangoRectangle *logical_rect);
PangoContext * pango_layout_get_context (PangoLayout *layout);
void pango_layout_set_ellipsize (
PangoLayout *layout,
PangoEllipsizeMode ellipsize);
void pango_get_log_attrs (
const char *text, int length, int level, PangoLanguage *language,
PangoLogAttr *log_attrs, int attrs_len);
// FontConfig
typedef int FcBool;
typedef struct _FcConfig FcConfig;
typedef struct _FcPattern FcPattern;
typedef struct _FcStrList FcStrList;
typedef unsigned char FcChar8;
typedef enum {
FcResultMatch, FcResultNoMatch, FcResultTypeMismatch, FcResultNoId,
FcResultOutOfMemory
} FcResult;
typedef enum {
FcMatchPattern, FcMatchFont, FcMatchScan
} FcMatchKind;
typedef struct _FcFontSet {
int nfont;
int sfont;
FcPattern **fonts;
} FcFontSet;
typedef enum _FcSetName {
FcSetSystem = 0,
FcSetApplication = 1
} FcSetName;
FcConfig * FcInitLoadConfigAndFonts (void);
void FcConfigDestroy (FcConfig *config);
FcBool FcConfigAppFontAddFile (
FcConfig *config, const FcChar8 *file);
FcConfig * FcConfigGetCurrent (void);
FcBool FcConfigSetCurrent (FcConfig *config);
FcBool FcConfigParseAndLoad (
FcConfig *config, const FcChar8 *file, FcBool complain);
FcFontSet * FcConfigGetFonts(FcConfig *config, FcSetName set);
FcStrList * FcConfigGetConfigFiles(FcConfig *config);
FcChar8 * FcStrListNext(FcStrList *list);
void FcDefaultSubstitute (FcPattern *pattern);
FcBool FcConfigSubstitute (
FcConfig *config, FcPattern *p, FcMatchKind kind);
FcPattern * FcPatternCreate (void);
FcPattern * FcPatternDestroy (FcPattern *p);
FcBool FcPatternAddString (
FcPattern *p, const char *object, const FcChar8 *s);
FcResult FcPatternGetString (
FcPattern *p, const char *object, int n, FcChar8 **s);
FcPattern * FcFontMatch (
FcConfig *config, FcPattern *p, FcResult *result);
// PangoFT2
typedef ... PangoFcFontMap;
PangoFontMap * pango_ft2_font_map_new (void);
void pango_fc_font_map_set_config (
PangoFcFontMap *fcfontmap, FcConfig *fcconfig);
''')
def _dlopen(ffi, *names):
"""Try various names for the same library, for different platforms."""
for name in names:
try:
return ffi.dlopen(name)
except OSError:
pass
# Re-raise the exception.
return ffi.dlopen(names[0]) # pragma: no cover
gobject = _dlopen(
ffi, 'gobject-2.0-0', 'gobject-2.0', 'libgobject-2.0-0',
'libgobject-2.0.so.0', 'libgobject-2.0.dylib')
pango = _dlopen(
ffi, 'pango-1.0-0', 'pango-1.0', 'libpango-1.0-0', 'libpango-1.0.so.0',
'libpango-1.0.dylib')
harfbuzz = _dlopen(
ffi, 'harfbuzz', 'harfbuzz-0.0', 'libharfbuzz-0',
'libharfbuzz.so.0', 'libharfbuzz.so.0', 'libharfbuzz.0.dylib')
fontconfig = _dlopen(
ffi, 'fontconfig-1', 'fontconfig', 'libfontconfig', 'libfontconfig-1.dll',
'libfontconfig.so.1', 'libfontconfig-1.dylib')
pangoft2 = _dlopen(
ffi, 'pangoft2-1.0-0', 'pangoft2-1.0', 'libpangoft2-1.0-0',
'libpangoft2-1.0.so.0', 'libpangoft2-1.0.dylib')
gobject.g_type_init()
units_to_double = pango.pango_units_to_double
units_from_double = pango.pango_units_from_double
def unicode_to_char_p(string):
"""Return ``(pointer, bytestring)``.
The byte string must live at least as long as the pointer is used.
"""
bytestring = string.encode('utf-8').replace(b'\x00', b'')
return ffi.new('char[]', bytestring), bytestring

View File

@ -1,6 +1,6 @@
"""
weasyprint.fonts
----------------
weasyprint.text.fonts
---------------------
Interface with external libraries managing fonts installed on the system.
@ -15,115 +15,12 @@ import warnings
from fontTools.ttLib import TTFont, woff2
from .logger import LOGGER
from .text import dlopen, ffi, get_font_features, gobject
from .urls import FILESYSTEM_ENCODING, fetch
fontconfig = dlopen(
ffi, 'fontconfig-1', 'fontconfig', 'libfontconfig', 'libfontconfig-1.dll',
'libfontconfig.so.1', 'libfontconfig-1.dylib')
pangoft2 = dlopen(
ffi, 'pangoft2-1.0-0', 'pangoft2-1.0', 'libpangoft2-1.0-0',
'libpangoft2-1.0.so', 'libpangoft2-1.0.dylib')
ffi.cdef('''
// FontConfig
typedef int FcBool;
typedef struct _FcConfig FcConfig;
typedef struct _FcPattern FcPattern;
typedef struct _FcStrList FcStrList;
typedef unsigned char FcChar8;
typedef enum {
FcResultMatch, FcResultNoMatch, FcResultTypeMismatch, FcResultNoId,
FcResultOutOfMemory
} FcResult;
typedef enum {
FcMatchPattern, FcMatchFont, FcMatchScan
} FcMatchKind;
typedef struct _FcFontSet {
int nfont;
int sfont;
FcPattern **fonts;
} FcFontSet;
typedef enum _FcSetName {
FcSetSystem = 0,
FcSetApplication = 1
} FcSetName;
FcConfig * FcInitLoadConfigAndFonts (void);
void FcConfigDestroy (FcConfig *config);
FcBool FcConfigAppFontAddFile (
FcConfig *config, const FcChar8 *file);
FcConfig * FcConfigGetCurrent (void);
FcBool FcConfigSetCurrent (FcConfig *config);
FcBool FcConfigParseAndLoad (
FcConfig *config, const FcChar8 *file, FcBool complain);
FcFontSet * FcConfigGetFonts(FcConfig *config, FcSetName set);
FcStrList * FcConfigGetConfigFiles(FcConfig *config);
FcChar8 * FcStrListNext(FcStrList *list);
void FcDefaultSubstitute (FcPattern *pattern);
FcBool FcConfigSubstitute (
FcConfig *config, FcPattern *p, FcMatchKind kind);
FcPattern * FcPatternCreate (void);
FcPattern * FcPatternDestroy (FcPattern *p);
FcBool FcPatternAddString (
FcPattern *p, const char *object, const FcChar8 *s);
FcResult FcPatternGetString (
FcPattern *p, const char *object, int n, FcChar8 **s);
FcPattern * FcFontMatch (
FcConfig *config, FcPattern *p, FcResult *result);
// PangoFT2
typedef ... PangoFcFontMap;
PangoFontMap * pango_ft2_font_map_new (void);
void pango_fc_font_map_set_config (
PangoFcFontMap *fcfontmap, FcConfig *fcconfig);
''')
FONTCONFIG_WEIGHT_CONSTANTS = {
'normal': 'normal',
'bold': 'bold',
100: 'thin',
200: 'extralight',
300: 'light',
400: 'normal',
500: 'medium',
600: 'demibold',
700: 'bold',
800: 'extrabold',
900: 'black',
}
FONTCONFIG_STYLE_CONSTANTS = {
'normal': 'roman',
'italic': 'italic',
'oblique': 'oblique',
}
FONTCONFIG_STRETCH_CONSTANTS = {
'normal': 'normal',
'ultra-condensed': 'ultracondensed',
'extra-condensed': 'extracondensed',
'condensed': 'condensed',
'semi-condensed': 'semicondensed',
'semi-expanded': 'semiexpanded',
'expanded': 'expanded',
'extra-expanded': 'extraexpanded',
'ultra-expanded': 'ultraexpanded',
}
from ..logger import LOGGER
from ..urls import FILESYSTEM_ENCODING, fetch
from .constants import (
CAPS_KEYS, EAST_ASIAN_KEYS, FONTCONFIG_STRETCH, FONTCONFIG_STYLE,
FONTCONFIG_WEIGHT, LIGATURE_KEYS, NUMERIC_KEYS)
from .ffi import ffi, fontconfig, gobject, pangoft2
def _check_font_configuration(font_config):
@ -317,15 +214,14 @@ class FontConfiguration:
LOGGER.debug(
'Failed to load font at %r (%s)', url, exc)
continue
font_features = {
features = {
rules[0][0].replace('-', '_'): rules[0][1] for rules in
rule_descriptors.get('font_variant', [])}
if 'font_feature_settings' in rule_descriptors:
font_features['font_feature_settings'] = (
features['font_feature_settings'] = (
rule_descriptors['font_feature_settings'])
features_string = ''
for key, value in get_font_features(
**font_features).items():
for key, value in font_features(**features).items():
features_string += f'<string>{key} {value}</string>'
fd = tempfile.NamedTemporaryFile(
'wb', dir=self._tempdir, delete=False)
@ -333,11 +229,11 @@ class FontConfiguration:
fd.write(font)
fd.close()
self._filenames.append(font_filename)
fontconfig_style = FONTCONFIG_STYLE_CONSTANTS[
fontconfig_style = FONTCONFIG_STYLE[
rule_descriptors.get('font_style', 'normal')]
fontconfig_weight = FONTCONFIG_WEIGHT_CONSTANTS[
fontconfig_weight = FONTCONFIG_WEIGHT[
rule_descriptors.get('font_weight', 'normal')]
fontconfig_stretch = FONTCONFIG_STRETCH_CONSTANTS[
fontconfig_stretch = FONTCONFIG_STRETCH[
rule_descriptors.get('font_stretch', 'normal')]
xml = f'''<?xml version="1.0"?>
<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
@ -400,3 +296,75 @@ class FontConfiguration:
os.remove(filename)
except OSError:
continue
def font_features(font_kerning='normal', font_variant_ligatures='normal',
font_variant_position='normal', font_variant_caps='normal',
font_variant_numeric='normal',
font_variant_alternates='normal',
font_variant_east_asian='normal',
font_feature_settings='normal'):
"""Get the font features from the different properties in style.
See https://www.w3.org/TR/css-fonts-3/#feature-precedence
"""
features = {}
# Step 1: getting the default, we rely on Pango for this
# Step 2: @font-face font-variant, done in fonts.add_font_face
# Step 3: @font-face font-feature-settings, done in fonts.add_font_face
# Step 4: font-variant and OpenType features
if font_kerning != 'auto':
features['kern'] = int(font_kerning == 'normal')
if font_variant_ligatures == 'none':
for keys in LIGATURE_KEYS.values():
for key in keys:
features[key] = 0
elif font_variant_ligatures != 'normal':
for ligature_type in font_variant_ligatures:
value = 1
if ligature_type.startswith('no-'):
value = 0
ligature_type = ligature_type[3:]
for key in LIGATURE_KEYS[ligature_type]:
features[key] = value
if font_variant_position == 'sub':
# TODO: the specification asks for additional checks
# https://www.w3.org/TR/css-fonts-3/#font-variant-position-prop
features['subs'] = 1
elif font_variant_position == 'super':
features['sups'] = 1
if font_variant_caps != 'normal':
# TODO: the specification asks for additional checks
# https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop
for key in CAPS_KEYS[font_variant_caps]:
features[key] = 1
if font_variant_numeric != 'normal':
for key in font_variant_numeric:
features[NUMERIC_KEYS[key]] = 1
if font_variant_alternates != 'normal':
# TODO: support other values
# See https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop
if font_variant_alternates == 'historical-forms':
features['hist'] = 1
if font_variant_east_asian != 'normal':
for key in font_variant_east_asian:
features[EAST_ASIAN_KEYS[key]] = 1
# Step 5: incompatible non-OpenType features, already handled by Pango
# Step 6: font-feature-settings
if font_feature_settings != 'normal':
features.update(dict(font_feature_settings))
return features

View File

@ -0,0 +1,610 @@
"""
weasyprint.text.line_break
--------------------------
Decide where to break text lines.
"""
import re
import pyphen
from ..logger import LOGGER
from .constants import LST_TO_ISO, PANGO_STRETCH, PANGO_STYLE, PANGO_WRAP_MODE
from .ffi import (
ffi, gobject, pango, pangoft2, unicode_to_char_p, units_from_double,
units_to_double)
from .fonts import font_features
def line_size(line, style):
"""Get logical width and height of the given ``line``.
``style`` is used to add letter spacing (if needed).
"""
logical_extents = ffi.new('PangoRectangle *')
pango.pango_layout_line_get_extents(line, ffi.NULL, logical_extents)
width = units_to_double(logical_extents.width)
height = units_to_double(logical_extents.height)
ffi.release(logical_extents)
if style['letter_spacing'] != 'normal':
width += style['letter_spacing']
return width, height
def first_line_metrics(first_line, text, layout, resume_at, space_collapse,
style, hyphenated=False, hyphenation_character=None):
length = first_line.length
if hyphenated:
length -= len(hyphenation_character.encode('utf8'))
elif resume_at:
# Set an infinite width as we don't want to break lines when drawing,
# the lines have already been split and the size may differ. Rendering
# is also much faster when no width is set.
pango.pango_layout_set_width(layout.layout, -1)
# Create layout with final text
first_line_text = text.encode('utf-8')[:length].decode('utf-8')
# Remove trailing spaces if spaces collapse
if space_collapse:
first_line_text = first_line_text.rstrip(' ')
# Remove soft hyphens
layout.set_text(first_line_text.replace('\u00ad', ''))
first_line, _ = layout.get_first_line()
length = first_line.length if first_line is not None else 0
if '\u00ad' in first_line_text:
soft_hyphens = 0
if first_line_text[0] == '\u00ad':
length += 2 # len('\u00ad'.encode('utf8'))
for i in range(len(layout.text)):
while i + soft_hyphens + 1 < len(first_line_text):
if first_line_text[i + soft_hyphens + 1] == '\u00ad':
soft_hyphens += 1
else:
break
length += soft_hyphens * 2 # len('\u00ad'.encode('utf8'))
width, height = line_size(first_line, style)
baseline = units_to_double(pango.pango_layout_get_baseline(layout.layout))
layout.deactivate()
return layout, length, resume_at, width, height, baseline
class Layout:
"""Object holding PangoLayout-related cdata pointers."""
def __init__(self, context, font_size, style, justification_spacing=0,
max_width=None):
self.justification_spacing = justification_spacing
self.setup(context, font_size, style)
self.max_width = max_width
def setup(self, context, font_size, style):
self.context = context
self.style = style
self.first_line_direction = 0
if context is None:
font_map = ffi.gc(
pangoft2.pango_ft2_font_map_new(), gobject.g_object_unref)
else:
font_map = context.font_config.font_map
pango_context = ffi.gc(
pango.pango_font_map_create_context(font_map),
gobject.g_object_unref)
pango.pango_context_set_round_glyph_positions(pango_context, False)
self.layout = ffi.gc(
pango.pango_layout_new(pango_context),
gobject.g_object_unref)
if style['font_language_override'] != 'normal':
lang_p, lang = unicode_to_char_p(LST_TO_ISO.get(
style['font_language_override'].lower(),
style['font_language_override']))
elif style['lang']:
lang_p, lang = unicode_to_char_p(style['lang'])
else:
lang = None
self.language = pango.pango_language_get_default()
if lang:
self.language = pango.pango_language_from_string(lang_p)
pango.pango_context_set_language(pango_context, self.language)
assert not isinstance(style['font_family'], str), (
'font_family should be a list')
self.font = ffi.gc(
pango.pango_font_description_new(),
pango.pango_font_description_free)
family_p, family = unicode_to_char_p(','.join(style['font_family']))
pango.pango_font_description_set_family(self.font, family_p)
pango.pango_font_description_set_style(
self.font, PANGO_STYLE[style['font_style']])
pango.pango_font_description_set_stretch(
self.font, PANGO_STRETCH[style['font_stretch']])
pango.pango_font_description_set_weight(
self.font, style['font_weight'])
pango.pango_font_description_set_absolute_size(
self.font, units_from_double(font_size))
pango.pango_layout_set_font_description(self.layout, self.font)
text_decoration = style['text_decoration_line']
if text_decoration != 'none':
metrics = ffi.gc(
pango.pango_context_get_metrics(
pango_context, self.font, self.language),
pango.pango_font_metrics_unref)
self.ascent = units_to_double(
pango.pango_font_metrics_get_ascent(metrics))
self.underline_position = units_to_double(
pango.pango_font_metrics_get_underline_position(metrics))
self.strikethrough_position = units_to_double(
pango.pango_font_metrics_get_strikethrough_position(metrics))
self.underline_thickness = units_to_double(
pango.pango_font_metrics_get_underline_thickness(metrics))
self.strikethrough_thickness = units_to_double(
pango.pango_font_metrics_get_strikethrough_thickness(metrics))
else:
self.ascent = None
self.underline_position = None
self.strikethrough_position = None
features = font_features(
style['font_kerning'], style['font_variant_ligatures'],
style['font_variant_position'], style['font_variant_caps'],
style['font_variant_numeric'], style['font_variant_alternates'],
style['font_variant_east_asian'], style['font_feature_settings'])
if features and context:
features = ','.join(
f'{key} {value}' for key, value in features.items())
# TODO: attributes should be freed.
# In the meantime, keep a cache to avoid leaking too many of them.
attr = context.font_features.get(features)
if attr is None:
try:
attr = pango.pango_attr_font_features_new(
features.encode('ascii'))
except AttributeError:
LOGGER.error(
'OpenType features are not available '
'with Pango < 1.38')
else:
context.font_features[features] = attr
if attr is not None:
attr_list = pango.pango_attr_list_new()
pango.pango_attr_list_insert(attr_list, attr)
pango.pango_layout_set_attributes(self.layout, attr_list)
def get_first_line(self):
first_line = pango.pango_layout_get_line_readonly(self.layout, 0)
second_line = pango.pango_layout_get_line_readonly(self.layout, 1)
if second_line != ffi.NULL:
index = second_line.start_index
else:
index = None
self.first_line_direction = first_line.resolved_dir
return first_line, index
def set_text(self, text, justify=False):
try:
# Keep only the first line plus one character, we don't need more
text = text[:text.index('\n') + 2]
except ValueError:
# End-of-line not found, keept the whole text
pass
text, bytestring = unicode_to_char_p(text)
self.text = bytestring.decode('utf-8')
pango.pango_layout_set_text(self.layout, text, -1)
# Word spacing may not be set if we're trying to get word-spacing
# computed value using a layout, for example if its unit is ex.
word_spacing = self.style.get('word_spacing', 0)
if justify:
# Justification is needed when drawing text but is useless during
# layout. Ignore it before layout is reactivated before the drawing
# step.
word_spacing += self.justification_spacing
# Letter spacing may not be set if we're trying to get letter-spacing
# computed value using a layout, for example if its unit is ex.
letter_spacing = self.style.get('letter_spacing', 'normal')
if letter_spacing == 'normal':
letter_spacing = 0
if text and (word_spacing != 0 or letter_spacing != 0):
letter_spacing = units_from_double(letter_spacing)
space_spacing = units_from_double(word_spacing) + letter_spacing
attr_list = pango.pango_layout_get_attributes(self.layout)
if not attr_list:
# TODO: list should be freed
attr_list = pango.pango_attr_list_new()
def add_attr(start, end, spacing):
# TODO: attributes should be freed
attr = pango.pango_attr_letter_spacing_new(spacing)
attr.start_index, attr.end_index = start, end
pango.pango_attr_list_change(attr_list, attr)
add_attr(0, len(bytestring), letter_spacing)
position = bytestring.find(b' ')
while position != -1:
add_attr(position, position + 1, space_spacing)
position = bytestring.find(b' ', position + 1)
pango.pango_layout_set_attributes(self.layout, attr_list)
# Tabs width
if b'\t' in bytestring:
self.set_tabs()
def set_tabs(self):
if isinstance(self.style['tab_size'], int):
layout = Layout(
self.context, self.style['font_size'], self.style,
self.justification_spacing)
layout.set_text(' ' * self.style['tab_size'])
line, _ = layout.get_first_line()
width, _ = line_size(line, self.style)
width = int(round(width))
else:
width = int(self.style['tab_size'].value)
# 0 is not handled correctly by Pango
array = ffi.gc(
pango.pango_tab_array_new_with_positions(
1, True, pango.PANGO_TAB_LEFT, width or 1),
pango.pango_tab_array_free)
pango.pango_layout_set_tabs(self.layout, array)
def deactivate(self):
del self.layout, self.font, self.language, self.style
def reactivate(self, style):
self.setup(self.context, style['font_size'], style)
self.set_text(self.text, justify=True)
def create_layout(text, style, context, max_width, justification_spacing):
"""Return an opaque Pango layout with default Pango line-breaks.
:param text: Unicode
:param style: a style dict of computed values
:param max_width:
The maximum available width in the same unit as ``style['font_size']``,
or ``None`` for unlimited width.
"""
layout = Layout(
context, style['font_size'], style, justification_spacing, max_width)
# Make sure that max_width * Pango.SCALE == max_width * 1024 fits in a
# signed integer. Treat bigger values same as None: unconstrained width.
text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line')
if max_width is not None and text_wrap and max_width < 2 ** 21:
pango.pango_layout_set_width(
layout.layout, units_from_double(max(0, max_width)))
layout.set_text(text)
return layout
def split_first_line(text, style, context, max_width, justification_spacing,
minimum=False):
"""Fit as much as possible in the available width for one line of text.
Return ``(layout, length, resume_at, width, height, baseline)``.
``layout``: a pango Layout with the first line
``length``: length in UTF-8 bytes of the first line
``resume_at``: The number of UTF-8 bytes to skip for the next line.
May be ``None`` if the whole text fits in one line.
This may be greater than ``length`` in case of preserved
newline characters.
``width``: width in pixels of the first line
``height``: height in pixels of the first line
``baseline``: baseline in pixels of the first line
"""
# See https://www.w3.org/TR/css-text-3/#white-space-property
text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line')
space_collapse = style['white_space'] in ('normal', 'nowrap', 'pre-line')
original_max_width = max_width
if not text_wrap:
max_width = None
# Step #1: Get a draft layout with the first line
layout = None
if (max_width is not None and max_width != float('inf') and
style['font_size']):
if max_width == 0:
# Trying to find minimum size, let's naively split on spaces and
# keep one word + one letter
space_index = text.find(' ')
if space_index == -1:
expected_length = len(text)
else:
expected_length = space_index + 2 # index + space + one letter
else:
expected_length = int(max_width / style['font_size'] * 2.5)
if expected_length < len(text):
# Try to use a small amount of text instead of the whole text
layout = create_layout(
text[:expected_length], style, context, max_width,
justification_spacing)
first_line, index = layout.get_first_line()
if index is None:
# The small amount of text fits in one line, give up and use
# the whole text
layout = None
if layout is None:
layout = create_layout(
text, style, context, original_max_width, justification_spacing)
first_line, index = layout.get_first_line()
resume_at = index
# Step #2: Don't split lines when it's not needed
if max_width is None:
# The first line can take all the place needed
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
first_line_width, _ = line_size(first_line, style)
if index is None and first_line_width <= max_width:
# The first line fits in the available width
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
# Step #3: Try to put the first word of the second line on the first line
# https://mail.gnome.org/archives/gtk-i18n-list/2013-September/msg00006
# is a good thread related to this problem.
first_line_text = text.encode('utf-8')[:index].decode('utf-8')
# We cant rely on first_line_width, see
# https://github.com/Kozea/WeasyPrint/issues/1051
first_line_fits = (
first_line_width <= max_width or
' ' in first_line_text.strip() or
can_break_text(first_line_text.strip(), style['lang']))
if first_line_fits:
# The first line fits but may have been cut too early by Pango
second_line_text = text.encode('utf-8')[index:].decode('utf-8')
else:
# The line can't be split earlier, try to hyphenate the first word.
first_line_text = ''
second_line_text = text
next_word = second_line_text.split(' ', 1)[0]
if next_word:
if space_collapse:
# next_word might fit without a space afterwards
# only try when space collapsing is allowed
new_first_line_text = first_line_text + next_word
layout.set_text(new_first_line_text)
first_line, index = layout.get_first_line()
first_line_width, _ = line_size(first_line, style)
if index is None and first_line_text:
# The next word fits in the first line, keep the layout
resume_at = len(new_first_line_text.encode('utf-8')) + 1
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
elif index:
# Text may have been split elsewhere by Pango earlier
resume_at = index
else:
# Second line is none
resume_at = first_line.length + 1
if resume_at >= len(text.encode('utf-8')):
resume_at = None
elif first_line_text:
# We found something on the first line but we did not find a word on
# the next line, no need to hyphenate, we can keep the current layout
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
# Step #4: Try to hyphenate
hyphens = style['hyphens']
lang = style['lang'] and pyphen.language_fallback(style['lang'])
total, left, right = style['hyphenate_limit_chars']
hyphenated = False
soft_hyphen = '\u00ad'
try_hyphenate = False
if hyphens != 'none':
next_word_boundaries = get_next_word_boundaries(second_line_text, lang)
if next_word_boundaries:
# We have a word to hyphenate
start_word, stop_word = next_word_boundaries
next_word = second_line_text[start_word:stop_word]
if stop_word - start_word >= total:
# This word is long enough
first_line_width, _ = line_size(first_line, style)
space = max_width - first_line_width
if style['hyphenate_limit_zone'].unit == '%':
limit_zone = (
max_width * style['hyphenate_limit_zone'].value / 100.)
else:
limit_zone = style['hyphenate_limit_zone'].value
if space > limit_zone or space < 0:
# Available space is worth the try, or the line is even too
# long to fit: try to hyphenate
try_hyphenate = True
if try_hyphenate:
# Automatic hyphenation possible and next word is long enough
auto_hyphenation = hyphens == 'auto' and lang
manual_hyphenation = False
if auto_hyphenation:
if soft_hyphen in first_line_text or soft_hyphen in next_word:
# Automatic hyphenation opportunities within a word must be
# ignored if the word contains a conditional hyphen, in favor
# of the conditional hyphen(s).
# See https://drafts.csswg.org/css-text-3/#valdef-hyphens-auto
manual_hyphenation = True
else:
manual_hyphenation = hyphens == 'manual'
if manual_hyphenation:
# Manual hyphenation: check that the line ends with a soft
# hyphen and add the missing hyphen
if first_line_text.endswith(soft_hyphen):
# The first line has been split on a soft hyphen
if ' ' in first_line_text:
first_line_text, next_word = (
first_line_text.rsplit(' ', 1))
next_word = f' {next_word}'
layout.set_text(first_line_text)
first_line, index = layout.get_first_line()
resume_at = len((first_line_text + ' ').encode('utf8'))
else:
first_line_text, next_word = '', first_line_text
soft_hyphen_indexes = [
match.start() for match in re.finditer(soft_hyphen, next_word)]
soft_hyphen_indexes.reverse()
dictionary_iterations = [
next_word[:i + 1] for i in soft_hyphen_indexes]
elif auto_hyphenation:
dictionary_key = (lang, left, right, total)
dictionary = context.dictionaries.get(dictionary_key)
if dictionary is None:
dictionary = pyphen.Pyphen(lang=lang, left=left, right=right)
context.dictionaries[dictionary_key] = dictionary
dictionary_iterations = [
start for start, end in dictionary.iterate(next_word)]
else:
dictionary_iterations = []
if dictionary_iterations:
for first_word_part in dictionary_iterations:
new_first_line_text = (
first_line_text +
second_line_text[:start_word] +
first_word_part)
hyphenated_first_line_text = (
new_first_line_text + style['hyphenate_character'])
new_layout = create_layout(
hyphenated_first_line_text, style, context, max_width,
justification_spacing)
new_first_line, new_index = new_layout.get_first_line()
new_first_line_width, _ = line_size(new_first_line, style)
new_space = max_width - new_first_line_width
if new_index is None and (
new_space >= 0 or
first_word_part == dictionary_iterations[-1]):
hyphenated = True
layout = new_layout
first_line = new_first_line
index = new_index
resume_at = len(new_first_line_text.encode('utf8'))
if text[len(new_first_line_text)] == soft_hyphen:
# Recreate the layout with no max_width to be sure that
# we don't break before the soft hyphen
pango.pango_layout_set_width(
layout.layout, units_from_double(-1))
resume_at += len(soft_hyphen.encode('utf8'))
break
if not hyphenated and not first_line_text:
# Recreate the layout with no max_width to be sure that
# we don't break before or inside the hyphenate character
hyphenated = True
layout.set_text(hyphenated_first_line_text)
pango.pango_layout_set_width(
layout.layout, units_from_double(-1))
first_line, index = layout.get_first_line()
resume_at = len(new_first_line_text.encode('utf8'))
if text[len(first_line_text)] == soft_hyphen:
resume_at += len(soft_hyphen.encode('utf8'))
if not hyphenated and first_line_text.endswith(soft_hyphen):
# Recreate the layout with no max_width to be sure that
# we don't break inside the hyphenate-character string
hyphenated = True
hyphenated_first_line_text = (
first_line_text + style['hyphenate_character'])
layout.set_text(hyphenated_first_line_text)
pango.pango_layout_set_width(
layout.layout, units_from_double(-1))
first_line, index = layout.get_first_line()
resume_at = len(first_line_text.encode('utf8'))
# Step 5: Try to break word if it's too long for the line
overflow_wrap = style['overflow_wrap']
first_line_width, _ = line_size(first_line, style)
space = max_width - first_line_width
# If we can break words and the first line is too long
if not minimum and overflow_wrap == 'break-word' and space < 0:
# Is it really OK to remove hyphenation for word-break ?
hyphenated = False
# TODO: Modify code to preserve W3C condition:
# "Shaping characters are still shaped as if the word were not broken"
# The way new lines are processed in this function (one by one with no
# memory of the last) prevents shaping characters (arabic, for
# instance) from keeping their shape when wrapped on the next line with
# pango layout. Maybe insert Unicode shaping characters in text?
layout.set_text(text)
pango.pango_layout_set_width(
layout.layout, units_from_double(max_width))
pango.pango_layout_set_wrap(
layout.layout, PANGO_WRAP_MODE['WRAP_CHAR'])
first_line, index = layout.get_first_line()
resume_at = index or first_line.length
if resume_at >= len(text.encode('utf-8')):
resume_at = None
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style, hyphenated,
style['hyphenate_character'])
def get_log_attrs(text, lang):
if lang:
lang_p, lang = unicode_to_char_p(lang)
else:
lang = None
language = pango.pango_language_get_default()
if lang:
language = pango.pango_language_from_string(lang_p)
# TODO: this should be removed when bidi is supported
for char in ('\u202a', '\u202b', '\u202c', '\u202d', '\u202e'):
text = text.replace(char, '')
text_p, bytestring = unicode_to_char_p(text)
length = len(text) + 1
log_attrs = ffi.new('PangoLogAttr[]', length)
pango.pango_get_log_attrs(
text_p, len(bytestring), -1, language, log_attrs, length)
return bytestring, log_attrs
def can_break_text(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
length = len(text) + 1
return any(attr.is_line_break for attr in log_attrs[1:length - 1])
def get_next_word_boundaries(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
for i, attr in enumerate(log_attrs):
if attr.is_word_end:
word_end = i
break
if attr.is_word_boundary:
word_start = i
else:
return None
return word_start, word_end
def get_last_word_end(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
for i, attr in enumerate(list(log_attrs)[::-1]):
if i and attr.is_word_end:
return len(text) - i

View File

@ -95,9 +95,9 @@ def path2url(path):
if path.startswith('///'):
# On Windows pathname2url(r'C:\foo') is apparently '///C:/foo'
# That enough slashes already.
return 'file:' + path
return f'file:{path}'
else:
return 'file://' + path
return f'file://{path}'
def url_is_absolute(url):
@ -135,7 +135,7 @@ def url_join(base_url, url, allow_relative, context, context_args):
return iri_to_uri(url)
else:
LOGGER.error(
'Relative URI reference without a base URI: ' + context,
f'Relative URI reference without a base URI: {context}',
*context_args)
return None
@ -175,15 +175,12 @@ def default_url_fetcher(url, timeout=10, ssl_context=None):
"""Fetch an external resource such as an image or stylesheet.
Another callable with the same signature can be given as the
:obj:`url_fetcher` argument to :class:`HTML` or :class:`CSS`.
(See :ref:`url-fetchers`.)
``url_fetcher`` argument to :class:`HTML` or :class:`CSS`.
(See :ref:`URL Fetchers`.)
:type url: str
:param url: The URL of the resource to fetch.
:type timeout: int
:param timeout: The number of seconds before HTTP requests are dropped.
:type ssl_context: ssl.SSLContext
:param ssl_context: An SSL context used for HTTP requests.
:param str url: The URL of the resource to fetch.
:param int timeout: The number of seconds before HTTP requests are dropped.
:param ssl.SSLContext ssl_context: An SSL context used for HTTP requests.
:raises: An exception indicating failure, e.g. :obj:`ValueError` on
syntactically invalid URL.
:returns: A :obj:`dict` with the following keys: