diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1c8b0ccf..22386940 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-latest, macos-latest, windows-latest] python-version: [3.6, 3.7, 3.8, 3.9, pypy3] exclude: # Pillow wheel missing for this configuration @@ -22,9 +22,9 @@ jobs: - uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - - name: Install DejaVu (Ubuntu) - if: matrix.os == 'ubuntu-20.04' - run: sudo apt-get update -y && sudo apt-get install fonts-dejavu -y + - name: Install DejaVu and Ghostscript (Ubuntu) + if: matrix.os == 'ubuntu-latest' + run: sudo apt-get update -y && sudo apt-get install fonts-dejavu ghostscript -y - name: Install DejaVu, Pango, libffi and Ghostscript (MacOS) if: matrix.os == 'macos-latest' run: | diff --git a/.gitignore b/.gitignore index 0a264f0c..42ba63ab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,22 +1,9 @@ -# Python Bytecode *.pyc - -# Building and Distributing -/*.egg-info -/.eggs +.cache +/.coverage /build /dist /docs/_build - -# Various Tools -/.coverage -/coverage.xml -/htmlcov -/env -/venv -/.vagrant -/.cache - -# Tests -/.pytest_cache +/pytest_cache /tests/test_draw/results +/venv diff --git a/docs/_static/box_model.png b/docs/_static/box_model.png deleted file mode 100644 index 38e4f31f..00000000 Binary files a/docs/_static/box_model.png and /dev/null differ diff --git a/docs/_static/custom.css b/docs/_static/custom.css deleted file mode 100644 index 2981a602..00000000 --- a/docs/_static/custom.css +++ /dev/null @@ -1,24 +0,0 @@ -.wy-nav-content { - height: auto; - min-height: 100%; -} - -@media print { - .wy-grid-for-nav { - height: auto; - position: static; - } -} - -.wy-side-nav-search, .wy-nav-side { - background-color: #14213d; -} - -.wy-side-nav-search input[type=text] { - border-radius: 0; -} - -.wy-menu-vertical a:hover { - background: #1ee494; - color: #14213d; -} diff --git a/docs/_static/icon.ico b/docs/_static/icon.ico deleted file mode 100644 index 58438b55..00000000 Binary files a/docs/_static/icon.ico and /dev/null differ diff --git a/docs/_static/logo.svg b/docs/_static/logo.svg deleted file mode 100644 index f1e17a08..00000000 --- a/docs/_static/logo.svg +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - diff --git a/docs/api.rst b/docs/api.rst deleted file mode 100644 index 0da5f1d9..00000000 --- a/docs/api.rst +++ /dev/null @@ -1,69 +0,0 @@ -API -=== - -API stability -------------- - -Everything described here is considered “public”: this is what you can rely -on. We will try to maintain backward-compatibility, and we really often do, but -there is no hard promise. - -Anything else should not be used outside of WeasyPrint itself. We reserve -the right to change it or remove it at any point. Use it at your own risk, -or have dependency to a specific WeasyPrint version. - - -Versioning ----------- - -Since version 43, WeasyPrint only provides major releases and does not follow -semantic versioning. This choice may look odd, but it is close to what many -browsers do, including Firefox and Chrome. - -Even if each version does not break the API, each version does break the way -documents are rendered, which is what really matters at the end. Providing -minor versions would give the illusion that developers can just update -WeasyPrint without checking that everything works. - -Unfortunately, we have the same problem as the other browsers: when a new -version is released, most of the user's websites are rendered exactly the same, -but a small part is not. And the only ways to know that, for web developers, -are to read the changelog and to check that their pages are correctly rendered. - -More about this choice can be found in -`issue #900 `_. - - -.. _command-line-api: - -Command-line API ----------------- - -.. autofunction:: weasyprint.__main__.main(argv=sys.argv) - - -.. module:: weasyprint -.. _python-api: - -Python API ----------- - -.. autoclass:: HTML(input, **kwargs) - :members: -.. autoclass:: CSS(input, **kwargs) -.. autoclass:: Attachment(input, **kwargs) -.. autofunction:: default_url_fetcher - -.. module:: weasyprint.document -.. autoclass:: Document - :members: -.. autoclass:: DocumentMetadata() - :members: -.. autoclass:: Page() - :members: - -.. module:: weasyprint.fonts -.. autoclass:: FontConfiguration() - -.. module:: weasyprint.css.counters -.. autoclass:: CounterStyle() diff --git a/docs/features.rst b/docs/api_reference.rst similarity index 79% rename from docs/features.rst rename to docs/api_reference.rst index 97b85078..42298892 100644 --- a/docs/features.rst +++ b/docs/api_reference.rst @@ -1,27 +1,101 @@ -Features -======== +API Reference +============= -This page is for WeasyPrint |version|. See :doc:`changelog ` -for older versions. +.. currentmodule:: weasyprint + + +This page is for WeasyPrint |version|. See :doc:`changelog ` for +older versions. + + +API Stability +------------- + +Everything described here is considered “public”: this is what you can rely +on. We will try to maintain backward-compatibility, and we really often do, but +there is no hard promise. + +Anything else should not be used outside of WeasyPrint itself. We reserve +the right to change it or remove it at any point. Use it at your own risk, +or have dependency to a specific WeasyPrint version. + + +Versioning +---------- + +WeasyPrint provides frequent major releases, and minor releases with only bug +fixes. Versioning is close to what many browsers do, including Firefox and +Chrome: big major numbers, small minor numbers. + +Even if each version does not break the API, each version does break the way +documents are rendered, which is what really matters at the end. Providing +minor versions would give the illusion that developers can just update +WeasyPrint without checking that everything works. + +Unfortunately, we have the same problem as the other browsers: when a new +version is released, most of the user's websites are rendered exactly the same, +but a small part is not. And the only ways to know that, for web developers, +are to read the changelog and to check that their pages are correctly rendered. + +More about this choice can be found in +issue `#900`_. + +.. _#900: https://github.com/Kozea/WeasyPrint/issues/900 + + +Command-line API +---------------- + +.. autofunction:: weasyprint.__main__.main(argv=sys.argv) + + +Python API +---------- + +.. autoclass:: HTML(input, **kwargs) + :members: +.. autoclass:: CSS(input, **kwargs) +.. autoclass:: Attachment(input, **kwargs) +.. autofunction:: default_url_fetcher + +.. module:: weasyprint.document +.. autoclass:: Document + :members: +.. autoclass:: DocumentMetadata() + :members: +.. autoclass:: Page() + :members: + +.. module:: weasyprint.text.fonts +.. autoclass:: FontConfiguration() + +.. module:: weasyprint.css.counters +.. autoclass:: CounterStyle() + + +Supported Features +------------------ URLs ----- +~~~~ WeasyPrint can read normal files, HTTP, FTP and `data URIs`_. It will follow HTTP redirects but more advanced features like cookies and authentication -are currently not supported, although a custom :ref:`url fetcher -` can help. +are currently not supported, although a custom :ref:`URL fetcher +` can help. .. _data URIs: http://en.wikipedia.org/wiki/Data_URI_scheme HTML ----- +~~~~ + +Supported HTML Tags ++++++++++++++++++++ Many HTML elements are implemented in CSS through the HTML5 -`User-Agent stylesheet -`_. +`User-Agent stylesheet`_. Some elements need special treatment: @@ -46,11 +120,32 @@ HTML, including font ``color`` and ``size``, list attributes like ``type`` and generated by WeasyPrint is missing some of the features you expect from the HTML, try to enable this option. +.. _User-Agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css .. _presentational hints: http://www.w3.org/TR/html5/rendering.html#presentational-hints +.. _Pillow: https://python-pillow.org/ + +Stylesheet Origins +++++++++++++++++++ + +HTML documents are rendered with stylesheets from three *origins*: + +* The HTML5 `user agent stylesheet`_ (defines the default appearance + of HTML elements); +* Author stylesheets embedded in the document in ``').write_png() - check_png_pattern(png_bytes) - check_png_pattern(rotated_png_bytes, rotated=True) - check_png_pattern(empty_png_bytes, blank=True) + media_type='screen').write_pdf() tmpdir.chdir() with open(resource_filename('pattern.png'), 'rb') as pattern_fd: @@ -331,83 +324,73 @@ def test_command_line_render(tmpdir): tmpdir.join('linked.html').write_binary(linked) tmpdir.join('style.css').write_binary(css) - _run('combined.html out1.png') _run('combined.html out2.pdf') - assert tmpdir.join('out1.png').read_binary() == png_bytes - # TODO: check PDF content? How? - # assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes + assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes - _run('combined-UTF-16BE.html out3.png --encoding UTF-16BE') - assert tmpdir.join('out3.png').read_binary() == png_bytes + _run('combined-UTF-16BE.html out3.pdf --encoding UTF-16BE') + assert tmpdir.join('out3.pdf').read_binary() == pdf_bytes - _run(tmpdir.join('combined.html').strpath + ' out4.png') - assert tmpdir.join('out4.png').read_binary() == png_bytes + _run(tmpdir.join('combined.html').strpath + ' out4.pdf') + assert tmpdir.join('out4.pdf').read_binary() == pdf_bytes - _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.png') - assert tmpdir.join('out5.png').read_binary() == png_bytes + _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.pdf') + assert tmpdir.join('out5.pdf').read_binary() == pdf_bytes - _run('linked.html --debug out6.png') # test relative URLs - assert tmpdir.join('out6.png').read_binary() == png_bytes + _run('linked.html --debug out6.pdf') # test relative URLs + assert tmpdir.join('out6.pdf').read_binary() == pdf_bytes - _run('combined.html --verbose out7 -f png') - _run('combined.html --quiet out8 --format pdf') - assert tmpdir.join('out7').read_binary() == png_bytes - # assert tmpdir.join('out8').read_binary(), pdf_bytes + _run('combined.html --verbose out7') + _run('combined.html --quiet out8') + assert tmpdir.join('out7').read_binary() == pdf_bytes + assert tmpdir.join('out8').read_binary() == pdf_bytes - _run('no_css.html out9.png') - _run('no_css.html out10.png -s style.css') - assert tmpdir.join('out9.png').read_binary() != png_bytes - # assert tmpdir.join('out10.png').read_binary() == png_bytes + _run('no_css.html out9.pdf') + _run('no_css.html out10.pdf -s style.css') + assert tmpdir.join('out9.pdf').read_binary() != pdf_bytes + assert tmpdir.join('out10.pdf').read_binary() == pdf_bytes - stdout = _run('--format png combined.html -') - assert stdout == png_bytes + stdout = _run('combined.html -') + assert stdout == pdf_bytes - _run('- out11.png', stdin=combined) - check_png_pattern(tmpdir.join('out11.png').read_binary()) - assert tmpdir.join('out11.png').read_binary() == png_bytes + _run('- out11.pdf', stdin=combined) + assert tmpdir.join('out11.pdf').read_binary() == pdf_bytes - stdout = _run('--format png - -', stdin=combined) - assert stdout == png_bytes + stdout = _run('- -', stdin=combined) + assert stdout == pdf_bytes - _run('combined.html out13.png --media-type screen') - _run('combined.html out12.png -m screen') - _run('linked.html out14.png -m screen') - assert tmpdir.join('out12.png').read_binary() == rotated_png_bytes - assert tmpdir.join('out13.png').read_binary() == rotated_png_bytes - assert tmpdir.join('out14.png').read_binary() == rotated_png_bytes + _run('combined.html out13.pdf --media-type screen') + _run('combined.html out12.pdf -m screen') + _run('linked.html out14.pdf -m screen') + assert tmpdir.join('out12.pdf').read_binary() == rotated_pdf_bytes + assert tmpdir.join('out13.pdf').read_binary() == rotated_pdf_bytes + assert tmpdir.join('out14.pdf').read_binary() == rotated_pdf_bytes - stdout = _run('-f pdf combined.html -') + stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 - stdout = _run('-f pdf combined.html -') + stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 - stdout = _run('-f pdf -a pattern.png combined.html -') + stdout = _run('-a pattern.png combined.html -') assert stdout.count(b'attachment') == 1 - stdout = _run('-f pdf -a style.css -a pattern.png combined.html -') + stdout = _run('-a style.css -a pattern.png combined.html -') assert stdout.count(b'attachment') == 2 - stdout = _run('-f png -r 192 linked.html -') - assert stdout == x2_png_bytes - stdout = _run('-f png --resolution 192 linked.html -') - assert _run('linked.html - -f png --resolution 192') == x2_png_bytes - assert stdout == x2_png_bytes - os.mkdir('subdirectory') py.path.local('subdirectory').chdir() with capture_logs() as logs: - stdout = _run('--format png - -', stdin=combined) + stdout = _run('- -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith('ERROR: Failed to load image') - assert stdout == empty_png_bytes + assert stdout.startswith(b'%PDF') with capture_logs() as logs: - stdout = _run('--format png --base-url= - -', stdin=combined) + stdout = _run('--base-url= - -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith( 'ERROR: Relative URI reference without a base URI') - assert stdout == empty_png_bytes + assert stdout.startswith(b'%PDF') - stdout = _run('--format png --base-url .. - -', stdin=combined) - assert stdout == png_bytes + stdout = _run('--base-url .. - -', stdin=combined) + assert stdout == pdf_bytes with pytest.raises(SystemExit): _run('--info') @@ -415,15 +398,6 @@ def test_command_line_render(tmpdir): with pytest.raises(SystemExit): _run('--version') - with pytest.raises(SystemExit): - _run('combined.html combined.jpg') - - with pytest.raises(SystemExit): - _run('combined.html combined.pdf --resolution 100') - - with pytest.raises(SystemExit): - _run('combined.html combined.png -a pattern.png') - @assert_no_logs def test_unicode_filenames(tmpdir): @@ -837,6 +811,8 @@ def test_url_fetcher(): 'url(weasyprint-custom:foo/é_%e9_pattern)">') test('') test('') + test('') + test('') test('') with capture_logs() as logs: diff --git a/tests/test_css_validation.py b/tests/test_css_validation.py index d96bacaf..50fd428c 100644 --- a/tests/test_css_validation.py +++ b/tests/test_css_validation.py @@ -391,7 +391,7 @@ def test_expand_list_style_invalid(rule): def assert_background(css, **expected): """Helper checking the background properties.""" - expanded = expand_to_dict('background: ' + css) + expanded = expand_to_dict(f'background: {css}') assert expanded.pop('background_color') == expected.pop( 'background_color', INITIAL_VALUES['background_color']) nb_layers = len(expanded['background_image']) @@ -524,7 +524,7 @@ def test_expand_background_position(): """Test the ``background-position`` property.""" def position(css, *expected): [(name, [value])] = expand_to_dict( - 'background-position:' + css).items() + f'background-position: {css}').items() assert name == 'background_position' assert value == expected for css_x, val_x in [ diff --git a/tests/test_draw/test_background.py b/tests/test_draw/test_background.py index ae75c11f..02e7c956 100644 --- a/tests/test_draw/test_background.py +++ b/tests/test_draw/test_background.py @@ -15,7 +15,7 @@ from . import assert_pixels @assert_no_logs @pytest.mark.parametrize( 'name, expected_width, expected_height, expected_pixels, html', ( - ('all_blue', 10, 10, (10 * (10 * 'B' + "\n")), ''' + ('all_blue', 10, 10, (10 * (10 * 'B' + '\n')), ''' +

+ abcd efgh ijkl +

+ ''') + + +@pytest.mark.xfail +def test_max_lines_nested(): + assert_pixels('max_lines_nested', 10, 12, ''' + BBBBBBBBBB + BBBBBBBBBB + BBBBBBBBBB + BBBBBBBBBB + rrrrrrrrrr + rrrrrrrrrr + rrrrrrrrrr + rrrrrrrrrr + BBBBBBBBBB + BBBBBBBBBB + __________ + __________ + ''', ''' + +
+ aaaaa + aaaaa +
+ bbbbb + bbbbb + bbbbb + bbbbb +
+ aaaaa + aaaaa +
+ ''') + + +def test_line_clamp(): + assert_pixels('line_clamp', 10, 10, ''' + BBBB__BB__ + BBBB__BB__ + BBBB__BB__ + BBBB__BB__ + BBBBBBBBBB + BBBBBBBBBB + __________ + __________ + __________ + __________ + ''', ''' + + +

+ aa a + bb b + cc c + dddd + eeee + ffff + gggg + hhhh +

+ ''') + + +@pytest.mark.xfail +def test_ellipsis_nested(): + assert_pixels('ellipsis_nested', 10, 10, ''' + BBBBBB____ + BBBBBB____ + BBBBBB____ + BBBBBB____ + BBBBBB____ + BBBBBB____ + BBBBBB____ + BBBBBB____ + BBBBBBBB__ + BBBBBBBB__ + ''', ''' + +
+

aaa

+

aaa

+

aaa

+

aaa

+

aaa

+

aaa

+
+ ''') + + +def test_text_align_right(): + assert_pixels('text_align_right', 9, 6, ''' + _________ + __RR__RR_ + __RR__RR_ + ______RR_ + ______RR_ + _________ + ''', ''' + +
a c e
''') + + +def test_text_align_justify(): + assert_pixels('text_align_justify', 9, 6, ''' + _________ + _RR___RR_ + _RR___RR_ + _RR______ + _RR______ + _________ + ''', ''' + +
a c e
''') + + +def test_text_word_spacing(): + assert_pixels('text_word_spacing', 19, 4, ''' + ___________________ + _RR____RR____RR____ + _RR____RR____RR____ + ___________________ + ''', ''' + +
a c e
''') + + +def test_text_letter_spacing(): + assert_pixels('text_letter_spacing', 19, 4, ''' + ___________________ + _RR____RR____RR____ + _RR____RR____RR____ + ___________________ + ''', ''' + +
ace
''') + + +def test_text_underline(): + assert_pixels('text_underline', 13, 7, ''' + _____________ + _zzzzzzzzzzz_ + _zRRRRRRRRRz_ + _zRRRRRRRRRz_ + _zBBBBBBBBBz_ + _zzzzzzzzzzz_ + _____________ + ''', ''' + +
abc
''') + + +def test_text_overline(): + # Ascent value seems to be a bit random, don’t try to get the exact + # position of the line + assert_pixels('text_overline', 13, 7, ''' + _____________ + _zzzzzzzzzzz_ + _zzzzzzzzzzz_ + _zRRRRRRRRRz_ + _zRRRRRRRRRz_ + _zzzzzzzzzzz_ + _____________ + ''', ''' + +
abc
''') + + +def test_text_line_through(): + assert_pixels('text_line_through', 13, 7, ''' + _____________ + _zzzzzzzzzzz_ + _zRRRRRRRRRz_ + _zBBBBBBBBBz_ + _zRRRRRRRRRz_ + _zzzzzzzzzzz_ + _____________ + ''', ''' + +
abc
''') diff --git a/tests/test_layout/test_block.py b/tests/test_layout/test_block.py index 253f14cb..8c35417d 100644 --- a/tests/test_layout/test_block.py +++ b/tests/test_layout/test_block.py @@ -775,3 +775,62 @@ def test_box_margin_top_repagination(): div, h1 = body.children assert div.margin_top == 0 assert div.padding_box_y() == 0 + + +@assert_no_logs +def test_continue_discard(): + page_1, = parse(''' + +
+
a
+
b
+
c
+
d
+
e
+
f
+
''') + html, = page_1.children + body, = html.children + article, = body.children + assert article.height == 3 * 25 + div_1, div_2, div_3 = article.children + assert div_1.position_y == 1 + assert div_2.position_y == 1 + 25 + assert div_3.position_y == 1 + 25 * 2 + assert article.border_bottom_width == 1 + + +@assert_no_logs +def test_continue_discard_children(): + page_1, = parse(''' + +
+
+
a
+
b
+
c
+
d
+
e
+
f
+
+
''') + html, = page_1.children + body, = html.children + article, = body.children + assert article.height == 2 + 3 * 25 + section, = article.children + assert section.height == 3 * 25 + div_1, div_2, div_3 = section.children + assert div_1.position_y == 2 + assert div_2.position_y == 2 + 25 + assert div_3.position_y == 2 + 25 * 2 + assert article.border_bottom_width == 1 diff --git a/tests/test_text.py b/tests/test_text.py index 486e230a..e62aab12 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -8,7 +8,7 @@ import pytest from weasyprint.css.properties import INITIAL_VALUES -from weasyprint.text import split_first_line +from weasyprint.text.line_break import split_first_line from .test_boxes import render_pages from .testing_utils import MONO_FONTS, SANS_FONTS, assert_no_logs @@ -410,7 +410,7 @@ def test_letter_spacing_1(): '' @@ -1073,3 +1073,59 @@ def test_leader_content(leader, content): after, = line.children inline, = after.children assert inline.children[0].text == content + + +@pytest.mark.xfail +@assert_no_logs +def test_max_lines(): + page, = render_pages(''' + +

+ abcd efgh ijkl +

+ ''') + html, = page.children + body, = html.children + p1, p2 = body.children + line1, line2 = p1.children + line3, = p2.children + text1, = line1.children + text2, = line2.children + text3, = line3.children + assert text1.text == 'abcd' + assert text2.text == 'efgh' + assert text3.text == 'ijkl' + + +@assert_no_logs +def test_continue(): + page, = render_pages(''' + +
+ abcd efgh ijkl +
+ ''') + html, = page.children + body, = html.children + p, = body.children + line1, line2 = p.children + text1, = line1.children + text2, = line2.children + assert text1.text == 'abcd' + assert text2.text == 'efgh' \ No newline at end of file diff --git a/weasyprint/VERSION b/weasyprint/VERSION deleted file mode 100644 index 0bf307b9..00000000 --- a/weasyprint/VERSION +++ /dev/null @@ -1 +0,0 @@ -52.2 diff --git a/weasyprint/__init__.py b/weasyprint/__init__.py index 974ae3e4..9e029b53 100644 --- a/weasyprint/__init__.py +++ b/weasyprint/__init__.py @@ -30,10 +30,11 @@ if hasattr(sys, 'frozen'): # pragma: no cover else: ROOT = Path(os.path.dirname(__file__)) -VERSION = __version__ = (ROOT / 'VERSION').read_text().strip() +VERSION = __version__ = '53.0' -__all__ = ['HTML', 'CSS', 'Attachment', 'Document', 'Page', - 'default_url_fetcher', 'VERSION'] +__all__ = [ + 'HTML', 'CSS', 'Attachment', 'Document', 'Page', 'default_url_fetcher', + 'VERSION', '__version__'] # Import after setting the version, as the version is used in other modules @@ -57,12 +58,10 @@ class HTML: :type filename: str or pathlib.Path :param filename: A filename, relative to the current directory, or absolute. - :type url: str - :param url: An absolute, fully qualified URL. + :param str url: An absolute, fully qualified URL. :type file_obj: :term:`file object` :param file_obj: Any object with a ``read`` method. - :type string: str - :param string: A string of HTML source. + :param str string: A string of HTML source. Specifying multiple inputs is an error: ``HTML(filename="foo.html", url="localhost://bar.html")`` @@ -70,20 +69,17 @@ class HTML: You can also pass optional named arguments: - :type encoding: str - :param encoding: Force the source character encoding. - :type base_url: str - :param base_url: The base used to resolve relative URLs + :param str encoding: Force the source character encoding. + :param str base_url: The base used to resolve relative URLs (e.g. in ````). If not provided, try to use the input filename, URL, or ``name`` attribute of :term:`file objects `. - :type url_fetcher: function + :type url_fetcher: :term:`function` :param url_fetcher: A function or other callable with the same signature as :func:`default_url_fetcher` called to fetch external resources such as stylesheets and images. - (See :ref:`url-fetchers`.) - :type media_type: str - :param media_type: The media type to use for ``@media``. + (See :ref:`URL Fetchers`.) + :param str media_type: The media type to use for ``@media``. Defaults to ``'print'``. **Note:** In some cases like ``HTML(string=foo)`` relative URLs will be invalid if ``base_url`` is not provided. @@ -128,29 +124,26 @@ class HTML: """Lay out and paginate the document, but do not (yet) export it to PDF or PNG. - This returns a :class:`~document.Document` object which provides + This returns a :class:`document.Document` object which provides access to individual pages and various meta-data. See :meth:`write_pdf` to get a PDF directly. .. versionadded:: 0.15 - :type stylesheets: list - :param stylesheets: + :param list stylesheets: An optional list of user stylesheets. List elements are :class:`CSS` objects, filenames, URLs, or file - objects. (See :ref:`stylesheet-origins`.) - :type presentational_hints: bool - :param presentational_hints: Whether HTML presentational hints are - followed. - :type optimize_images: bool - :param optimize_images: Try to optimize the size of embedded images. - :type font_config: :class:`~fonts.FontConfiguration` + objects. (See :ref:`Stylesheet Origins`.) + :param bool presentational_hints: + Whether HTML presentational hints are followed. + :param bool optimize_images: + Try to optimize the size of embedded images. + :type font_config: :class:`text.fonts.FontConfiguration` :param font_config: A font configuration handling ``@font-face`` rules. - :type counter_style: :class:`~css.counters.CounterStyle` + :type counter_style: :class:`css.counters.CounterStyle` :param counter_style: A dictionary storing ``@counter-style`` rules. - :type image_cache: dict - :param image_cache: A dictionary used to cache images. - :returns: A :class:`~document.Document` object. + :param dict image_cache: A dictionary used to cache images. + :returns: A :class:`document.Document` object. """ return Document._render( @@ -166,36 +159,32 @@ class HTML: This is a shortcut for calling :meth:`render`, then :meth:`Document.write_pdf() `. - :type target: str, pathlib.Path or file object + :type target: + :class:`str`, :class:`pathlib.Path` or :term:`file object` :param target: A filename where the PDF file is generated, a file object, or :obj:`None`. - :type stylesheets: list - :param stylesheets: + :param list stylesheets: An optional list of user stylesheets. The list's elements are :class:`CSS` objects, filenames, URLs, or file-like - objects. (See :ref:`stylesheet-origins`.) - :type zoom: float - :param zoom: + objects. (See :ref:`Stylesheet Origins`.) + :param float zoom: The zoom factor in PDF units per CSS units. **Warning**: All CSS units are affected, including physical units like ``cm`` and named sizes like ``A4``. For values other than 1, the physical CSS units will thus be "wrong". - :type attachments: list - :param attachments: A list of additional file attachments for the + :param list attachments: A list of additional file attachments for the generated PDF document or :obj:`None`. The list's elements are :class:`Attachment` objects, filenames, URLs or file-like objects. - :type presentational_hints: bool - :param presentational_hints: Whether HTML presentational hints are + :param bool presentational_hints: Whether HTML presentational hints are followed. - :type optimize_images: bool - :param optimize_images: Try to optimize the size of embedded images. - :type font_config: :class:`~fonts.FontConfiguration` + :param bool optimize_images: + Try to optimize the size of embedded images. + :type font_config: :class:`text.fonts.FontConfiguration` :param font_config: A font configuration handling ``@font-face`` rules. - :type counter_style: :class:`~css.counters.CounterStyle` + :type counter_style: :class:`css.counters.CounterStyle` :param counter_style: A dictionary storing ``@counter-style`` rules. - :type image_cache: dict - :param image_cache: A dictionary used to cache images. + :param dict image_cache: A dictionary used to cache images. :returns: The PDF as :obj:`bytes` if ``target`` is not provided or :obj:`None`, otherwise :obj:`None` (the PDF is written to @@ -217,11 +206,11 @@ class CSS: arguments. An additional argument called ``font_config`` must be provided to handle - ``@font-config`` rules. The same ``fonts.FontConfiguration`` object must be - used for different ``CSS`` objects applied to the same document. + ``@font-config`` rules. The same ``text.fonts.FontConfiguration`` object + must be used for different ``CSS`` objects applied to the same document. ``CSS`` objects have no public attributes or methods. They are only meant - to be used in the :meth:`~HTML.write_pdf` and :meth:`~HTML.render` methods + to be used in the :meth:`HTML.write_pdf` and :meth:`HTML.render` methods of :class:`HTML` objects. """ diff --git a/weasyprint/__main__.py b/weasyprint/__main__.py index 5dd3e0f4..bab3e1ff 100644 --- a/weasyprint/__main__.py +++ b/weasyprint/__main__.py @@ -14,7 +14,7 @@ import sys import pydyf from . import HTML, LOGGER, __version__ -from .text import pango +from .text.ffi import pango class PrintInfo(argparse.Action): @@ -57,7 +57,7 @@ def main(argv=None, stdout=None, stdin=None): .. option:: -s , --stylesheet Filename or URL of a user cascading stylesheet (see - :ref:`stylesheet-origins`) to add to the document + :ref:`Stylesheet Origins`) to add to the document (e.g. ``-s print.css``). Multiple stylesheets are allowed. .. option:: -m , --media-type @@ -120,17 +120,11 @@ def main(argv=None, stdout=None, stdin=None): help='Print system information and exit.') parser.add_argument('-e', '--encoding', help='Character encoding of the input') - parser.add_argument('-f', '--format', choices=['pdf', 'png'], - help='Output format. Can be omitted if `output` ' - 'ends with a .pdf or .png extension.') parser.add_argument('-s', '--stylesheet', action='append', help='URL or filename for a user CSS stylesheet. ' 'May be given multiple times.') parser.add_argument('-m', '--media-type', default='print', help='Media type to use for @media, defaults to print') - parser.add_argument('-r', '--resolution', type=float, - help='PNG only: the resolution in pixel per CSS inch. ' - 'Defaults to 96, one PNG pixel per CSS pixel.') parser.add_argument('-u', '--base-url', help='Base for relative URLs in the HTML input. ' "Defaults to the input's own filename or URL " @@ -155,19 +149,6 @@ def main(argv=None, stdout=None, stdin=None): args = parser.parse_args(argv) - if args.format is None: - output_lower = args.output.lower() - if output_lower.endswith('.pdf'): - format_ = 'pdf' - elif output_lower.endswith('.png'): - format_ = 'png' - else: - parser.error( - 'Either specify a format with -f or choose an ' - 'output filename that ends in .pdf or .png') - else: - format_ = args.format.lower() - if args.input == '-': source = stdin or sys.stdin.buffer if args.base_url is None: @@ -185,18 +166,8 @@ def main(argv=None, stdout=None, stdin=None): kwargs = { 'stylesheets': args.stylesheet, 'presentational_hints': args.presentational_hints, - 'optimize_images': args.optimize_images} - if args.resolution: - if format_ == 'png': - kwargs['resolution'] = args.resolution - else: - parser.error('--resolution only applies for the PNG format.') - - if args.attachment: - if format_ == 'pdf': - kwargs['attachments'] = args.attachment - else: - parser.error('--attachment only applies for the PDF format.') + 'optimize_images': args.optimize_images, + 'attachments': args.attachment} # Default to logging to stderr. if args.debug: @@ -210,7 +181,7 @@ def main(argv=None, stdout=None, stdin=None): html = HTML(source, base_url=args.base_url, encoding=args.encoding, media_type=args.media_type) - getattr(html, 'write_' + format_)(output, **kwargs) + html.write_pdf(output, **kwargs) if __name__ == '__main__': # pragma: no cover diff --git a/weasyprint/css/__init__.py b/weasyprint/css/__init__.py index 00b0eae0..43c41a20 100644 --- a/weasyprint/css/__init__.py +++ b/weasyprint/css/__init__.py @@ -26,7 +26,7 @@ from ..logger import LOGGER, PROGRESS_LOGGER from ..urls import URLFetchingError, get_url_attribute, url_join from . import computed_values, counters, media_queries from .properties import INHERITED, INITIAL_NOT_COMPUTED, INITIAL_VALUES -from .utils import remove_whitespace +from .utils import get_url, remove_whitespace from .validation import preprocess_declarations from .validation.descriptors import preprocess_descriptors @@ -127,12 +127,12 @@ class StyleFor: style['border_collapse'] == 'collapse'): # Padding do not apply for side in ['top', 'bottom', 'left', 'right']: - style['padding_' + side] = computed_values.ZERO_PIXELS + style[f'padding_{side}'] = computed_values.ZERO_PIXELS if (style['display'].startswith('table-') and style['display'] != 'table-caption'): # Margins do not apply for side in ['top', 'bottom', 'left', 'right']: - style['margin_' + side] = computed_values.ZERO_PIXELS + style[f'margin_{side}'] = computed_values.ZERO_PIXELS return style @@ -830,9 +830,18 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, continue tokens = remove_whitespace(rule.prelude) - if tokens and tokens[0].type in ('url', 'string'): - url = tokens[0].value - else: + url = None + if tokens: + if tokens[0].type == 'string': + url = url_join( + base_url, tokens[0].value, allow_relative=False, + context='@import at %s:%s', + context_args=(rule.source_line, rule.source_column)) + else: + url_tuple = get_url(tokens[0], base_url) + if url_tuple and url_tuple[1][0] == 'external': + url = url_tuple[1][1] + if url is None: continue media = media_queries.parse_media_query(tokens[1:]) if media is None: @@ -845,10 +854,6 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, if not media_queries.evaluate_media_query( media, device_media_type): continue - url = url_join( - base_url, url, allow_relative=False, - context='@import at %d:%d', - context_args=(rule.source_line, rule.source_column)) if url is not None: try: CSS( @@ -910,7 +915,7 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, tinycss2.parse_declaration_list(margin_rule.content))) if declarations: selector_list = [( - specificity, '@' + margin_rule.lower_at_keyword, + specificity, f'@{margin_rule.lower_at_keyword}', page_type)] page_rules.append( (margin_rule, selector_list, declarations)) diff --git a/weasyprint/css/computed_values.py b/weasyprint/css/computed_values.py index 50ab2ad8..28596b71 100644 --- a/weasyprint/css/computed_values.py +++ b/weasyprint/css/computed_values.py @@ -12,8 +12,9 @@ from urllib.parse import unquote from tinycss2.color3 import parse_color -from .. import text from ..logger import LOGGER +from ..text.ffi import ffi, pango, units_to_double +from ..text.line_break import Layout, first_line_metrics, line_size from ..urls import get_link_attribute from .properties import ( INHERITED, INITIAL_NOT_COMPUTED, INITIAL_VALUES, Dimension) @@ -378,12 +379,12 @@ def length(computer, name, value, font_size=None, pixels_only=False): elif unit == 'ch': # TODO: cache # TODO: use context to use @font-face fonts - layout = text.Layout( + layout = Layout( context=None, font_size=font_size, style=computer['computed']) layout.set_text('0') line, _ = layout.get_first_line() - logical_width, _ = text.get_size(line, computer['computed']) + logical_width, _ = line_size(line, computer['computed']) result = value.value * logical_width elif unit == 'em': result = value.value * font_size @@ -772,10 +773,10 @@ def strut_layout(style, context=None): if key in context.strut_layouts: return context.strut_layouts[key] - layout = text.Layout(context, style['font_size'], style) + layout = Layout(context, style['font_size'], style) layout.set_text(' ') line, _ = layout.get_first_line() - _, _, _, _, text_height, baseline = text.first_line_metrics( + _, _, _, _, text_height, baseline = first_line_metrics( line, '', layout, resume_at=None, space_collapse=False, style=style) if style['line_height'] == 'normal': result = text_height, baseline @@ -795,10 +796,15 @@ def ex_ratio(style): """Return the ratio 1ex/font_size, according to given style.""" font_size = 1000 # big value # TODO: use context to use @font-face fonts - layout = text.Layout(context=None, font_size=font_size, style=style) + layout = Layout(context=None, font_size=font_size, style=style) layout.set_text('x') line, _ = layout.get_first_line() - _, ink_height_above_baseline = text.get_ink_position(line) + + ink_extents = ffi.new('PangoRectangle *') + pango.pango_layout_line_get_extents(line, ink_extents, ffi.NULL) + height_above_baseline = units_to_double(ink_extents.y) + ffi.release(ink_extents) + # Zero means some kind of failure, fallback is 0.5. # We round to try keeping exact values that were altered by Pango. - return round(-ink_height_above_baseline / font_size, 5) or 0.5 + return round(-height_above_baseline / font_size, 5) or 0.5 diff --git a/weasyprint/css/counters.py b/weasyprint/css/counters.py index 18f9d836..54f5d37a 100644 --- a/weasyprint/css/counters.py +++ b/weasyprint/css/counters.py @@ -41,8 +41,8 @@ class CounterStyle(dict): .. versionadded:: 0.52 - Keep a list of counter styles defined by @counter-style rules, indexed by - their names. + Keep a list of counter styles defined by ``@counter-style`` rules, indexed + by their names. See https://www.w3.org/TR/css-counter-styles-3/. diff --git a/weasyprint/css/properties.py b/weasyprint/css/properties.py index 19af719b..c529b2bd 100644 --- a/weasyprint/css/properties.py +++ b/weasyprint/css/properties.py @@ -184,6 +184,9 @@ INITIAL_VALUES = { 'text_decoration_style': 'solid', # Overflow Module 3 (WD): https://www.w3.org/TR/css-overflow-3/ + 'block_ellipsis': 'none', + 'continue': 'auto', + 'max_lines': 'none', 'overflow': 'visible', 'text_overflow': 'clip', @@ -218,6 +221,7 @@ KNOWN_PROPERTIES = set(name.replace('_', '-') for name in INITIAL_VALUES) # link: click events normally bubble up to link ancestors # See http://lists.w3.org/Archives/Public/www-style/2012Jun/0315.html INHERITED = { + 'block_ellipsis', 'border_collapse', 'border_spacing', 'caption_side', diff --git a/weasyprint/css/utils.py b/weasyprint/css/utils.py index 073e5a71..d63fe685 100644 --- a/weasyprint/css/utils.py +++ b/weasyprint/css/utils.py @@ -376,7 +376,7 @@ def parse_function(function_token): space-separated arguments. Return ``None`` otherwise. """ - if not getattr(function_token, 'type', None) == 'function': + if function_token.type != 'function': return content = list(remove_whitespace(function_token.arguments)) diff --git a/weasyprint/css/validation/descriptors.py b/weasyprint/css/validation/descriptors.py index 4a710cc3..266134de 100644 --- a/weasyprint/css/validation/descriptors.py +++ b/weasyprint/css/validation/descriptors.py @@ -202,7 +202,7 @@ def font_variant(tokens): for name, sub_tokens in expand_font_variant(tokens): try: values.append(properties.validate_non_shorthand( - None, 'font-variant' + name, sub_tokens, required=True)) + None, f'font-variant{name}', sub_tokens, required=True)) except InvalidValues: return None return values diff --git a/weasyprint/css/validation/expanders.py b/weasyprint/css/validation/expanders.py index 7dd88f3c..dd4e2fcd 100644 --- a/weasyprint/css/validation/expanders.py +++ b/weasyprint/css/validation/expanders.py @@ -16,11 +16,12 @@ from ..utils import ( from .descriptors import expand_font_variant from .properties import ( background_attachment, background_image, background_position, - background_repeat, background_size, border_style, border_width, box, - column_count, column_width, flex_basis, flex_direction, flex_grow_shrink, - flex_wrap, font_family, font_size, font_stretch, font_style, font_weight, - line_height, list_style_image, list_style_position, list_style_type, - other_colors, overflow_wrap, validate_non_shorthand) + background_repeat, background_size, block_ellipsis, border_style, + border_width, box, column_count, column_width, flex_basis, flex_direction, + flex_grow_shrink, flex_wrap, font_family, font_size, font_stretch, + font_style, font_weight, line_height, list_style_image, + list_style_position, list_style_type, other_colors, overflow_wrap, + validate_non_shorthand) EXPANDERS = {} @@ -273,7 +274,7 @@ def expand_background(base_url, name, tokens): def add(name, value): if value is None: return False - name = 'background_' + name + name = f'background_{name}' if name in results: raise InvalidValues results[name] = value @@ -607,3 +608,26 @@ def expand_flex_flow(base_url, name, tokens): raise InvalidValues else: raise InvalidValues + + +@expander('line-clamp') +def expand_line_clamp(base_url, name, tokens): + """Expand the ``line-clamp`` property.""" + if len(tokens) == 1: + keyword = get_single_keyword(tokens) + if keyword == 'none': + yield 'max_lines', 'none' + yield 'continue', 'auto' + yield 'block-ellipsis', 'none' + elif tokens[0].type == 'number' and tokens[0].int_value is not None: + yield 'max_lines', tokens[0].int_value + yield 'continue', 'discard' + yield 'block-ellipsis', 'auto' + elif len(tokens) == 2: + if tokens[0].type == 'number': + max_lines = tokens[0].int_value + ellipsis = block_ellipsis([tokens[1]]) + if max_lines and ellipsis is not None: + yield 'max_lines', tokens[0].value + yield 'continue', 'discard' + yield 'block-ellipsis', ellipsis diff --git a/weasyprint/css/validation/properties.py b/weasyprint/css/validation/properties.py index d3aadd88..5437a33e 100644 --- a/weasyprint/css/validation/properties.py +++ b/weasyprint/css/validation/properties.py @@ -326,6 +326,36 @@ def box_decoration_break(keyword): return keyword in ('slice', 'clone') +@property() +@single_token +def block_ellipsis(token): + """``box-ellipsis`` property validation.""" + if token.type == 'string': + return ('string', token.value) + else: + keyword = get_keyword(token) + if keyword in ('none', 'auto'): + return keyword + + +@property('continue', unstable=True) +@single_keyword +def continue_(keyword): + """``continue`` property validation.""" + return keyword in ('auto', 'discard') + + +@property(unstable=True) +@single_token +def max_lines(token): + if token.type == 'number' and token.int_value is not None: + if token.int_value >= 1: + return token.int_value + keyword = get_keyword(token) + if keyword == 'none': + return keyword + + @property(unstable=True) @single_keyword def margin_break(keyword): @@ -518,7 +548,7 @@ def counter(tokens, default_integer): return # expected a keyword here counter_name = token.value if counter_name in ('none', 'initial', 'inherit'): - raise InvalidValues('Invalid counter name: ' + counter_name) + raise InvalidValues(f'Invalid counter name: {counter_name}') token = next(tokens, None) if token is not None and ( token.type == 'number' and token.int_value is not None): @@ -1237,10 +1267,9 @@ def anchor(token): function = parse_function(token) if function: name, args = function - prototype = (name, [a.type for a in args]) - args = [getattr(a, 'value', a) for a in args] + prototype = (name, [arg.type for arg in args]) if prototype == ('attr', ['ident']): - return ('attr()', args[0]) + return ('attr()', args[0].value) @property(proprietary=True, wants_base_url=True) @@ -1255,10 +1284,9 @@ def link(token, base_url): function = parse_function(token) if function: name, args = function - prototype = (name, [a.type for a in args]) - args = [getattr(a, 'value', a) for a in args] + prototype = (name, [arg.type for arg in args]) if prototype == ('attr', ['ident']): - return ('attr()', args[0]) + return ('attr()', args[0].value) @property() @@ -1352,10 +1380,9 @@ def lang(token): function = parse_function(token) if function: name, args = function - prototype = (name, [a.type for a in args]) - args = [getattr(a, 'value', a) for a in args] + prototype = (name, [arg.type for arg in args]) if prototype == ('attr', ['ident']): - return ('attr()', args[0]) + return ('attr()', args[0].value) elif token.type == 'string': return ('string', token.value) diff --git a/weasyprint/document.py b/weasyprint/document.py index 349cc521..807afd54 100644 --- a/weasyprint/document.py +++ b/weasyprint/document.py @@ -23,7 +23,6 @@ from .css import get_all_computed_styles from .css.counters import CounterStyle from .css.targets import TargetCollector from .draw import draw_page, stacked -from .fonts import FontConfiguration from .formatting_structure import boxes from .formatting_structure.build import build_formatting_structure from .html import W3C_DATE_RE, get_html_metadata @@ -31,7 +30,8 @@ from .images import get_image_from_uri as original_get_image_from_uri from .layout import LayoutContext, layout_document from .layout.percentages import percentage from .logger import LOGGER, PROGRESS_LOGGER -from .text import ffi, pango +from .text.ffi import ffi, pango +from .text.fonts import FontConfiguration from .urls import URLFetchingError @@ -68,22 +68,24 @@ def _w3c_date_to_pdf(string, attr_name): class Font: def __init__(self, file_content, pango_font): pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL) - font_description = pango.pango_font_describe(pango_font) - font_family = ffi.string(pango.pango_font_description_get_family( - font_description)) - font_size = pango.pango_font_description_get_size(font_description) + self._font_description = pango.pango_font_describe(pango_font) + self.family = ffi.string(pango.pango_font_description_get_family( + self._font_description)) + font_size = pango.pango_font_description_get_size( + self._font_description) + description_string = ffi.string( + pango.pango_font_description_to_string(self._font_description)) sha = hashlib.sha256() - sha.update(file_content) + sha.update(description_string) self.file_content = file_content + self.file_hash = hash(file_content) self.hash = ''.join( chr(65 + letter % 26) for letter in sha.digest()[:6]) self.name = ( b'/' + self.hash.encode('ascii') + b'+' + - font_family.replace(b' ', b'')) - self.family = font_family - self.flags = 4 - self.italic_angle = 0 + self.family.replace(b' ', b'')) + self.italic_angle = 0 # TODO: this should be different self.ascent = int( pango.pango_font_metrics_get_ascent(pango_metrics) / font_size * 1000) @@ -96,6 +98,18 @@ class Font: self.widths = {} self.cmap = {} + @property + def flags(self): + flags = 2 ** 3 # Symbolic, custom character set + if pango.pango_font_description_get_style(self._font_description): + flags += 2 ** 7 # Italic + if b'Serif' in self.family.split(): + flags += 2 ** 2 # Serif + widths = self.widths.values() + if len(widths) > 1 and len(set(widths)) == 1: + flags += 2 ** 1 # FixedPitch + return flags + class Context(pydyf.Stream): """PDF stream object with context storing alpha states.""" @@ -635,8 +649,6 @@ class Page: has_link = link and not isinstance(box, (boxes.TextBox, boxes.LineBox)) # In case of duplicate IDs, only the first is an anchor. has_anchor = anchor_name and anchor_name not in self.anchors - is_attachment = getattr(box, 'is_attachment', False) - download_name = getattr(box, 'attachment_download', None) if has_bookmark or has_link or has_anchor: pos_x, pos_y, width, height = box.hit_area() @@ -645,18 +657,18 @@ class Page: assert token_type == 'url' link_type, target = link assert isinstance(target, str) - if link_type == 'external' and is_attachment: + if link_type == 'external' and box.is_attachment: link_type = 'attachment' if matrix: link = ( link_type, target, rectangle_aabb(matrix, pos_x, pos_y, width, height), - download_name) + box.download_name) else: link = ( link_type, target, (pos_x, pos_y, pos_x + width, pos_y + height), - download_name) + box.download_name) self.links.append(link) if matrix and (has_bookmark or has_anchor): pos_x, pos_y = matrix.transform_point(pos_x, pos_y) @@ -672,20 +684,16 @@ class Page: def paint(self, context, left_x=0, top_y=0, scale=1, clip=False): """Paint the page into the PDF file. - :type context: :class:`pdf.Context` + :type context: ``Context`` :param context: A context object. - :type left_x: float - :param left_x: + :param float left_x: X coordinate of the left of the page, in PDF points. - :type top_y: float - :param top_y: + :param float top_y: Y coordinate of the top of the page, in PDF points. - :type scale: float - :param scale: + :param float scale: Zoom scale. - :type clip: bool - :param clip: + :param bool clip: Whether to clip/cut content outside the page. If false or not provided, content can overflow. @@ -763,7 +771,7 @@ class Document: can also be instantiated directly with a list of :class:`pages `, a set of :class:`metadata `, a :func:`url_fetcher ` function, and a :class:`font_config - `. + `. """ @@ -869,11 +877,11 @@ class Document: #: but to the whole document. self.metadata = metadata #: A function or other callable with the same signature as - #: :func:`default_url_fetcher` called to fetch external resources such - #: as stylesheets and images. (See :ref:`url-fetchers`.) + #: :func:`weasyprint.default_url_fetcher` called to fetch external + #: resources such as stylesheets and images. (See :ref:`URL Fetchers`.) self.url_fetcher = url_fetcher #: A :obj:`dict` of fonts used by the document. Keys are hashes used to - #: identify fonts, values are :class:`Font` objects. + #: identify fonts, values are ``Font`` objects. self.fonts = {} # Keep a reference to font_config to avoid its garbage collection until # rendering is destroyed. This is needed as font_config.__del__ removes @@ -917,22 +925,21 @@ class Document: def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None): """Paint the pages in a PDF file, with metadata. - :type target: str, pathlib.Path or file object + :type target: + :class:`str`, :class:`pathlib.Path` or :term:`file object` :param target: A filename where the PDF file is generated, a file object, or :obj:`None`. - :type zoom: float - :param zoom: + :param float zoom: The zoom factor in PDF units per CSS units. **Warning**: All CSS units are affected, including physical units like ``cm`` and named sizes like ``A4``. For values other than 1, the physical CSS units will thus be "wrong". - :type attachments: list - :param attachments: A list of additional file attachments for the + :param list attachments: A list of additional file attachments for the generated PDF document or :obj:`None`. The list's elements are - :class:`Attachment` objects, filenames, URLs or file-like objects. + ``Attachment`` objects, filenames, URLs or file-like objects. :param finisher: A finisher function, that accepts the document and a - ``pydyf.PDF`` object as parameters, can be passed to perform + :class:`pydyf.PDF` object as parameters, can be passed to perform post-processing on the PDF right before the trailer is written. :returns: The PDF as :obj:`bytes` if ``target`` is not provided or @@ -1162,22 +1169,32 @@ class Document: pdf.catalog['Names']['EmbeddedFiles'] = content.reference # Embeded fonts - fonts = pydyf.Dictionary() + pdf_fonts = pydyf.Dictionary() + fonts_by_file_hash = {} for font in self.fonts.values(): + if font.file_hash in fonts_by_file_hash: + fonts_by_file_hash[font.file_hash].append(font) + else: + fonts_by_file_hash[font.file_hash] = [font] + font_references_by_file_hash = {} + for file_hash, fonts in fonts_by_file_hash.items(): # Optimize font + cmap = {} + for font in fonts: + cmap = {**cmap, **font.cmap} + full_font = io.BytesIO(fonts[0].file_content) + optimized_font = io.BytesIO() try: - full_font = io.BytesIO(font.file_content) - optimized_font = io.BytesIO() ttfont = TTFont(full_font) options = subset.Options( retain_gids=True, passthrough_tables=True) subsetter = subset.Subsetter(options) - subsetter.populate(gids=font.cmap) + subsetter.populate(gids=cmap) subsetter.subset(ttfont) ttfont.save(optimized_font) content = optimized_font.getvalue() except TTLibError: - content = font.file_content + content = fonts[0].file_content # Include font font_type = 'otf' if content[:4] == b'OTTO' else 'ttf' @@ -1187,7 +1204,9 @@ class Document: font_extra = pydyf.Dictionary({'Length1': len(content)}) font_stream = pydyf.Stream([content], font_extra, compress=True) pdf.add_object(font_stream) + font_references_by_file_hash[file_hash] = font_stream.reference + for font in self.fonts.values(): widths = pydyf.Array() for i in sorted(font.widths): if i - 1 not in font.widths: @@ -1199,7 +1218,7 @@ class Document: 'Type': '/FontDescriptor', 'FontName': font.name, 'FontFamily': pydyf.String(font.family), - 'Flags': 32, + 'Flags': font.flags, 'FontBBox': pydyf.Array(font.bbox), 'ItalicAngle': font.italic_angle, 'Ascent': font.ascent, @@ -1208,7 +1227,7 @@ class Document: 'StemV': font.stemv, 'StemH': font.stemh, (f'FontFile{"3" if font_type == "otf" else "2"}'): - font_stream.reference, + font_references_by_file_hash[font.file_hash], }) if font_type == 'otf': font_descriptor['Subtype'] = '/OpenType' @@ -1262,10 +1281,10 @@ class Document: 'ToUnicode': to_unicode.reference, }) pdf.add_object(font_dictionary) - fonts[font.hash] = font_dictionary.reference + pdf_fonts[font.hash] = font_dictionary.reference - pdf.add_object(fonts) - resources['Font'] = fonts.reference + pdf.add_object(pdf_fonts) + resources['Font'] = pdf_fonts.reference self._use_references(pdf, resources) # Anchors diff --git a/weasyprint/draw.py b/weasyprint/draw.py index c196d2d7..c24c3fa1 100644 --- a/weasyprint/draw.py +++ b/weasyprint/draw.py @@ -14,7 +14,8 @@ from .formatting_structure import boxes from .layout import replaced from .layout.backgrounds import BackgroundLayer from .stacking import StackingContext -from .text import show_first_line +from .text.ffi import ffi, harfbuzz, pango, units_from_double, units_to_double +from .text.line_break import get_last_word_end SIDES = ('top', 'right', 'bottom', 'left') CROP = ''' @@ -991,7 +992,8 @@ def draw_replacedbox(context, box): context, draw_width, draw_height, box.style['image_rendering']) -def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'): +def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip', + block_ellipsis='none'): if isinstance(box, StackingContext): stacking_context = box assert isinstance( @@ -1003,8 +1005,13 @@ def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'): if isinstance(box, (boxes.InlineBox, boxes.LineBox)): if isinstance(box, boxes.LineBox): text_overflow = box.text_overflow + block_ellipsis = box.block_ellipsis in_text = False - for child in box.children: + ellipsis = 'none' + for i, child in enumerate(box.children): + if i == len(box.children) - 1: + # Last child + ellipsis = block_ellipsis if isinstance(child, StackingContext): child_offset_x = offset_x else: @@ -1014,13 +1021,16 @@ def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'): if not in_text: context.begin_text() in_text = True - draw_text(context, child, child_offset_x, text_overflow) + draw_text( + context, child, child_offset_x, text_overflow, + ellipsis) else: if in_text: in_text = False context.end_text() draw_inline_level( - context, page, child, child_offset_x, text_overflow) + context, page, child, child_offset_x, text_overflow, + ellipsis) if in_text: context.end_text() elif isinstance(box, boxes.InlineReplacedBox): @@ -1033,7 +1043,7 @@ def draw_inline_level(context, page, box, offset_x=0, text_overflow='clip'): context.end_text() -def draw_text(context, textbox, offset_x, text_overflow): +def draw_text(context, textbox, offset_x, text_overflow, block_ellipsis): """Draw a textbox to a pydyf stream.""" # Pango crashes with font-size: 0 assert textbox.style['font_size'] @@ -1046,39 +1056,180 @@ def draw_text(context, textbox, offset_x, text_overflow): context.set_alpha(textbox.style['color'][3]) textbox.pango_layout.reactivate(textbox.style) - show_first_line(context, textbox, text_overflow, x, y) + draw_first_line(context, textbox, text_overflow, block_ellipsis, x, y) + # Draw text decoration values = textbox.style['text_decoration_line'] - - thickness = textbox.style['font_size'] / 18 # Like other browsers do - color = textbox.style['text_decoration_color'] if color == 'currentColor': color = textbox.style['color'] - - if ('overline' in values or - 'line-through' in values or - 'underline' in values): - metrics = textbox.pango_layout.get_font_metrics() if 'overline' in values: - draw_text_decoration( - context, textbox, offset_x, - textbox.baseline - metrics.ascent + thickness / 2, - thickness, color) + thickness = textbox.pango_layout.underline_thickness + offset_y = ( + textbox.baseline - textbox.pango_layout.ascent + thickness / 2) if 'underline' in values: - draw_text_decoration( - context, textbox, offset_x, - textbox.baseline - metrics.underline_position + thickness / 2, - thickness, color) + thickness = textbox.pango_layout.underline_thickness + offset_y = ( + textbox.baseline - textbox.pango_layout.underline_position + + thickness / 2) if 'line-through' in values: + thickness = textbox.pango_layout.strikethrough_thickness + offset_y = ( + textbox.baseline - textbox.pango_layout.strikethrough_position) + if values != 'none': draw_text_decoration( - context, textbox, offset_x, - textbox.baseline - metrics.strikethrough_position, - thickness, color) + context, textbox, offset_x, offset_y, thickness, color) textbox.pango_layout.deactivate() +def draw_first_line(context, textbox, text_overflow, block_ellipsis, x, y): + """Draw the given ``textbox`` line to the document ``context``.""" + pango.pango_layout_set_single_paragraph_mode( + textbox.pango_layout.layout, True) + + if text_overflow == 'ellipsis' or block_ellipsis != 'none': + assert textbox.pango_layout.max_width is not None + max_width = textbox.pango_layout.max_width + pango.pango_layout_set_width( + textbox.pango_layout.layout, units_from_double(max_width)) + if text_overflow == 'ellipsis': + pango.pango_layout_set_ellipsize( + textbox.pango_layout.layout, pango.PANGO_ELLIPSIZE_END) + else: + if block_ellipsis == 'auto': + ellipsis = '…' + else: + assert block_ellipsis[0] == 'string' + ellipsis = block_ellipsis[1] + + # Remove last word if hyphenated + new_text = textbox.pango_layout.text + if new_text.endswith(textbox.style['hyphenate_character']): + last_word_end = get_last_word_end( + new_text[:-len(textbox.style['hyphenate_character'])], + textbox.style['lang']) + if last_word_end: + new_text = new_text[:last_word_end] + + textbox.pango_layout.set_text(new_text + ellipsis) + + first_line, second_line = textbox.pango_layout.get_first_line() + + if block_ellipsis != 'none': + while second_line: + last_word_end = get_last_word_end( + textbox.pango_layout.text[:-len(ellipsis)], + textbox.style['lang']) + if last_word_end is None: + break + new_text = textbox.pango_layout.text[:last_word_end] + textbox.pango_layout.set_text(new_text + ellipsis) + first_line, second_line = textbox.pango_layout.get_first_line() + + font_size = textbox.style['font_size'] + utf8_text = textbox.pango_layout.text.encode('utf-8') + previous_utf8_position = 0 + + runs = [first_line.runs[0]] + while runs[-1].next != ffi.NULL: + runs.append(runs[-1].next) + + context.text_matrix(font_size, 0, 0, -font_size, x, y) + last_font = None + string = '' + for run in runs: + # Pango objects + glyph_item = ffi.cast('PangoGlyphItem *', run.data) + glyph_string = glyph_item.glyphs + glyphs = glyph_string.glyphs + num_glyphs = glyph_string.num_glyphs + offset = glyph_item.item.offset + clusters = glyph_string.log_clusters + + # Font content + pango_font = glyph_item.item.analysis.font + pango_desc = pango.pango_font_describe(pango_font) + font_hash = ffi.string( + pango.pango_font_description_to_string(pango_desc)) + fonts = context.get_fonts() + if font_hash in fonts: + font = fonts[font_hash] + else: + hb_font = pango.pango_font_get_hb_font(pango_font) + hb_face = harfbuzz.hb_font_get_face(hb_font) + hb_blob = harfbuzz.hb_face_reference_blob(hb_face) + hb_data = harfbuzz.hb_blob_get_data(hb_blob, context.length) + file_content = ffi.unpack(hb_data, int(context.length[0])) + font = context.add_font(font_hash, file_content, pango_font) + + # Positions of the glyphs in the UTF-8 string + utf8_positions = [offset + clusters[i] for i in range(1, num_glyphs)] + utf8_positions.append(offset + glyph_item.item.length) + + # Go through the run glyphs + if font != last_font: + if string: + context.show_text(string) + string = '' + last_font = font + context.set_font_size(font.hash, 1) + string += '<' + for i in range(num_glyphs): + glyph = glyphs[i].glyph + width = glyphs[i].geometry.width + utf8_position = utf8_positions[i] + + offset = glyphs[i].geometry.x_offset / font_size + if offset: + string += f'>{-offset}<' + string += f'{glyph:04x}' + + # Ink bounding box and logical widths in font + if glyph not in font.widths: + pango.pango_font_get_glyph_extents( + pango_font, glyph, context.ink_rect, context.logical_rect) + x1, y1, x2, y2 = ( + context.ink_rect.x, + -context.ink_rect.y - context.ink_rect.height, + context.ink_rect.x + context.ink_rect.width, + -context.ink_rect.y) + if x1 < font.bbox[0]: + font.bbox[0] = int(units_to_double(x1 * 1000) / font_size) + if y1 < font.bbox[1]: + font.bbox[1] = int(units_to_double(y1 * 1000) / font_size) + if x2 > font.bbox[2]: + font.bbox[2] = int(units_to_double(x2 * 1000) / font_size) + if y2 > font.bbox[3]: + font.bbox[3] = int(units_to_double(y2 * 1000) / font_size) + font.widths[glyph] = int( + units_to_double(context.logical_rect.width * 1000) / + font_size) + + # Kerning, word spacing, letter spacing + kerning = int( + font.widths[glyph] - + units_to_double(width * 1000) / font_size + + offset) + if kerning: + string += f'>{kerning}<' + + # Mapping between glyphs and characters + if glyph not in font.cmap and glyph != pango.PANGO_GLYPH_EMPTY: + utf8_slice = slice(previous_utf8_position, utf8_position) + font.cmap[glyph] = utf8_text[utf8_slice].decode('utf-8') + previous_utf8_position = utf8_position + + # Close the last glyphs list, remove if empty + if string[-1] == '<': + string = string[:-1] + else: + string += '>' + + # Draw text + context.show_text(string) + + def draw_wave(context, x, y, width, offset_x, radius): up = 1 max_x = x + width diff --git a/weasyprint/formatting_structure/boxes.py b/weasyprint/formatting_structure/boxes.py index e1f001ee..00154c23 100644 --- a/weasyprint/formatting_structure/boxes.py +++ b/weasyprint/formatting_structure/boxes.py @@ -76,11 +76,13 @@ class Box: is_for_root_element = False is_column = False is_leader = False + is_attachment = False # Other properties transformation_matrix = None bookmark_label = None string_set = None + download_name = None # Default, overriden on some subclasses def all_children(self): @@ -338,7 +340,7 @@ class ParentBox(Box): """A flat generator for a box, its children and descendants.""" yield self for child in self.children: - if hasattr(child, 'descendants'): + if isinstance(child, ParentBox): for grand_child in child.descendants(): yield grand_child else: @@ -410,6 +412,7 @@ class LineBox(ParentBox): """ text_overflow = 'clip' + block_ellipsis = 'none' @classmethod def anonymous_from(cls, parent, *args, **kwargs): diff --git a/weasyprint/html.py b/weasyprint/html.py index b1bb5999..d288b109 100644 --- a/weasyprint/html.py +++ b/weasyprint/html.py @@ -49,7 +49,7 @@ def ascii_lower(string): This is used for `ASCII case-insensitive `_ matching. - This is different from the :meth:`~py:str.lower` method of Unicode strings + This is different from the :meth:`str.lower` method of Unicode strings which also affect non-ASCII characters, sometimes mapping them into the ASCII range: @@ -249,7 +249,7 @@ def handle_td(element, box, _get_image_from_uri, _base_url): def handle_a(element, box, _get_image_from_uri, base_url): """Handle the ``rel`` attribute.""" box.is_attachment = element_has_link_type(element, 'attachment') - box.attachment_download = element.get('download') + box.download_name = element.get('download') return [box] diff --git a/weasyprint/layout/absolute.py b/weasyprint/layout/absolute.py index 84e52eb7..a5062b44 100644 --- a/weasyprint/layout/absolute.py +++ b/weasyprint/layout/absolute.py @@ -206,7 +206,7 @@ def absolute_block(context, box, containing_block, fixed_boxes): new_box, _, _, _, _ = block_container_layout( context, box, max_position_y=float('inf'), skip_stack=None, page_is_empty=False, absolute_boxes=absolute_boxes, - fixed_boxes=fixed_boxes, adjoining_margins=None) + fixed_boxes=fixed_boxes, adjoining_margins=None, discard=False) for child_placeholder in absolute_boxes: absolute_layout(context, child_placeholder, new_box, fixed_boxes) diff --git a/weasyprint/layout/blocks.py b/weasyprint/layout/blocks.py index b487bec4..1250fc92 100644 --- a/weasyprint/layout/blocks.py +++ b/weasyprint/layout/blocks.py @@ -21,7 +21,7 @@ from .tables import table_layout, table_wrapper_width def block_level_layout(context, box, max_position_y, skip_stack, containing_block, page_is_empty, absolute_boxes, - fixed_boxes, adjoining_margins): + fixed_boxes, adjoining_margins, discard): """Lay out the block-level ``box``. :param max_position_y: the absolute vertical position (as in @@ -57,12 +57,12 @@ def block_level_layout(context, box, max_position_y, skip_stack, return block_level_layout_switch( context, box, max_position_y, skip_stack, containing_block, - page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins) + page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins, discard) def block_level_layout_switch(context, box, max_position_y, skip_stack, containing_block, page_is_empty, absolute_boxes, - fixed_boxes, adjoining_margins): + fixed_boxes, adjoining_margins, discard): """Call the layout function corresponding to the ``box`` type.""" if isinstance(box, boxes.TableBox): return table_layout( @@ -71,7 +71,8 @@ def block_level_layout_switch(context, box, max_position_y, skip_stack, elif isinstance(box, boxes.BlockBox): return block_box_layout( context, box, max_position_y, skip_stack, containing_block, - page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins) + page_is_empty, absolute_boxes, fixed_boxes, adjoining_margins, + discard) elif isinstance(box, boxes.BlockReplacedBox): box = block_replaced_box_layout(box, containing_block) # Don't collide with floats @@ -93,7 +94,7 @@ def block_level_layout_switch(context, box, max_position_y, skip_stack, def block_box_layout(context, box, max_position_y, skip_stack, containing_block, page_is_empty, absolute_boxes, - fixed_boxes, adjoining_margins): + fixed_boxes, adjoining_margins, discard): """Lay out the block ``box``.""" if (box.style['column_width'] != 'auto' or box.style['column_count'] != 'auto'): @@ -124,7 +125,7 @@ def block_box_layout(context, box, max_position_y, skip_stack, new_box, resume_at, next_page, adjoining_margins, collapsing_through = \ block_container_layout( context, box, max_position_y, skip_stack, page_is_empty, - absolute_boxes, fixed_boxes, adjoining_margins) + absolute_boxes, fixed_boxes, adjoining_margins, discard) if new_box and new_box.is_table_wrapper: # Don't collide with floats # http://www.w3.org/TR/CSS21/visuren.html#floats @@ -256,7 +257,7 @@ def relative_positioning(box, containing_block): def block_container_layout(context, box, max_position_y, skip_stack, page_is_empty, absolute_boxes, fixed_boxes, - adjoining_margins=None): + adjoining_margins, discard): """Set the ``box`` height.""" # TODO: boxes.FlexBox is allowed here because flex_layout calls # block_container_layout, there's probably a better solution. @@ -273,10 +274,14 @@ def block_container_layout(context, box, max_position_y, skip_stack, is_start = skip_stack is None box.remove_decoration(start=not is_start, end=False) + discard |= box.style['continue'] == 'discard' + draw_bottom_decoration = ( + discard or box.style['box_decoration_break'] == 'clone') + if adjoining_margins is None: adjoining_margins = [] - if box.style['box_decoration_break'] == 'clone': + if draw_bottom_decoration: max_position_y -= ( box.padding_bottom + box.border_bottom_width + box.margin_bottom) @@ -371,14 +376,14 @@ def block_container_layout(context, box, max_position_y, skip_stack, new_containing_block, absolute_boxes, fixed_boxes, first_letter_style) is_page_break = False - for line, resume_at in lines_iterator: + for i, (line, resume_at) in enumerate(lines_iterator): line.resume_at = resume_at new_position_y = line.position_y + line.height # Add bottom padding and border to the bottom position of the # box if needed - if resume_at is None or ( - box.style['box_decoration_break'] == 'clone'): + draw_bottom_decoration |= resume_at is None + if draw_bottom_decoration: offset_y = box.border_bottom_width + box.padding_bottom else: offset_y = 0 @@ -431,6 +436,13 @@ def block_container_layout(context, box, max_position_y, skip_stack, new_children.append(line) position_y = new_position_y skip_stack = resume_at + + # Break box if we reached max-lines + if box.style['max_lines'] != 'none': + if i >= box.style['max_lines'] - 1: + line.block_ellipsis = box.style['block_ellipsis'] + break + if new_children: resume_at = (index, new_children[-1].resume_at) if is_page_break: @@ -505,7 +517,7 @@ def block_container_layout(context, box, max_position_y, skip_stack, collapsing_through) = block_level_layout( context, child, max_position_y, skip_stack, new_containing_block, page_is_empty_with_no_children, - absolute_boxes, fixed_boxes, adjoining_margins) + absolute_boxes, fixed_boxes, adjoining_margins, discard) skip_stack = None if new_child is not None: @@ -595,7 +607,11 @@ def block_container_layout(context, box, max_position_y, skip_stack, else: resume_at = None - if (resume_at is not None and + box_is_fragmented = resume_at is not None + if box.style['continue'] == 'discard': + resume_at = None + + if (box_is_fragmented and box.style['break_inside'] in ('avoid', 'avoid-page') and not page_is_empty): return ( @@ -638,8 +654,15 @@ def block_container_layout(context, box, max_position_y, skip_stack, position_y += collapse_margin(adjoining_margins) adjoining_margins = [] + # Add block ellipsis + if box_is_fragmented and new_children: + last_child = new_children[-1] + if isinstance(last_child, boxes.LineBox): + last_child.block_ellipsis = box.style['block_ellipsis'] + new_box = box.copy_with_children(new_children) - new_box.remove_decoration(start=not is_start, end=resume_at is not None) + new_box.remove_decoration( + start=not is_start, end=box_is_fragmented and not discard) # TODO: See corner cases in # http://www.w3.org/TR/CSS21/visudet.html#normal-block @@ -663,19 +686,19 @@ def block_container_layout(context, box, max_position_y, skip_stack, if not isinstance(new_box, boxes.BlockBox): context.finish_block_formatting_context(new_box) - if resume_at is None: + if discard or not box_is_fragmented: # After finish_block_formatting_context which may increment # new_box.height new_box.height = max( min(new_box.height, new_box.max_height), new_box.min_height) - else: + elif max_position_y < float('inf'): # Make the box fill the blank space at the bottom of the page # https://www.w3.org/TR/css-break-3/#box-splitting new_box.height = ( max_position_y - new_box.position_y - (new_box.margin_height() - new_box.height)) - if box.style['box_decoration_break'] == 'clone': + if draw_bottom_decoration: new_box.height += ( box.padding_bottom + box.border_bottom_width + box.margin_bottom) diff --git a/weasyprint/layout/columns.py b/weasyprint/layout/columns.py index 618ffdf8..458e8563 100644 --- a/weasyprint/layout/columns.py +++ b/weasyprint/layout/columns.py @@ -124,7 +124,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block, new_child, _, _, adjoining_margins, _ = block_level_layout( context, block, original_max_position_y, skip_stack, containing_block, page_is_empty, absolute_boxes, fixed_boxes, - adjoining_margins) + adjoining_margins, discard=False) new_children.append(new_child) current_position_y = ( new_child.border_height() + new_child.border_box_y()) @@ -142,7 +142,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block, column_box = create_column_box(column_children) new_child, _, _, _, _ = block_box_layout( context, column_box, float('inf'), skip_stack, containing_block, - page_is_empty, [], [], []) + page_is_empty, [], [], [], discard=False) height = new_child.margin_height() if style['column_fill'] == 'balance': height /= count @@ -163,7 +163,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block, new_box, resume_at, next_page, _, _ = block_box_layout( context, column_box, box.content_box_y() + height, column_skip_stack, containing_block, page_is_empty, - [], [], []) + [], [], [], discard=False) if new_box is None: # We didn't render anything. Give up and use the max # content height. @@ -184,7 +184,8 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block, # Get the minimum size needed to render the next box next_box, _, _, _, _ = block_box_layout( context, column_box, box.content_box_y(), - column_skip_stack, containing_block, True, [], [], []) + column_skip_stack, containing_block, True, [], [], [], + discard=False) for child in next_box.children: if child.is_in_normal_flow(): next_box_size = child.margin_height() @@ -246,7 +247,7 @@ def columns_layout(context, box, max_position_y, skip_stack, containing_block, block_box_layout( context, column_box, max_position_y, skip_stack, containing_block, page_is_empty, absolute_boxes, - fixed_boxes, None)) + fixed_boxes, None, discard=False)) if new_child is None: break next_page = column_next_page diff --git a/weasyprint/layout/flex.py b/weasyprint/layout/flex.py index 6fb0d98d..c6d99d74 100644 --- a/weasyprint/layout/flex.py +++ b/weasyprint/layout/flex.py @@ -52,7 +52,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block, else: main_space = max_position_y - box.position_y if containing_block.height != 'auto': - if hasattr(containing_block.height, 'unit'): + if isinstance(containing_block.height, Dimension): assert containing_block.height.unit == 'px' main_space = min(main_space, containing_block.height.value) else: @@ -69,7 +69,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block, if cross == 'height': main_space = max_position_y - box.content_box_y() if containing_block.height != 'auto': - if hasattr(containing_block.height, 'unit'): + if isinstance(containing_block.height, Dimension): assert containing_block.height.unit == 'px' main_space = min(main_space, containing_block.height.value) else: @@ -154,7 +154,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block, new_child.style['max_height'] = Dimension(float('inf'), 'px') new_child = blocks.block_level_layout( context, new_child, float('inf'), child_skip_stack, - parent_box, page_is_empty, [], [], [])[0] + parent_box, page_is_empty, [], [], [], False)[0] content_size = new_child.height child.min_height = min(specified_size, content_size) @@ -214,7 +214,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block, new_child = blocks.block_level_layout( context, new_child, float('inf'), child_skip_stack, parent_box, page_is_empty, absolute_boxes, fixed_boxes, - adjoining_margins=[])[0] + adjoining_margins=[], discard=False)[0] child.flex_base_size = new_child.margin_height() elif child.style[axis] == 'min-content': child.style[axis] = 'auto' @@ -229,7 +229,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block, new_child = blocks.block_level_layout( context, new_child, float('inf'), child_skip_stack, parent_box, page_is_empty, absolute_boxes, fixed_boxes, - adjoining_margins=[])[0] + adjoining_margins=[], discard=False)[0] child.flex_base_size = new_child.margin_height() else: assert child.style[axis].unit == 'px' @@ -463,7 +463,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block, blocks.block_level_layout_switch( context, child_copy, float('inf'), child_skip_stack, parent_box, page_is_empty, absolute_boxes, fixed_boxes, - adjoining_margins=[])) + adjoining_margins=[], discard=False)) child._baseline = find_in_flow_baseline(new_child) or 0 if cross == 'height': @@ -842,7 +842,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block, new_child, child_resume_at = blocks.block_level_layout_switch( context, child, max_position_y, child_skip_stack, box, page_is_empty, absolute_boxes, fixed_boxes, - adjoining_margins=[])[:2] + adjoining_margins=[], discard=False)[:2] if new_child is None: if resume_at and resume_at[0]: resume_at = (resume_at[0] + i - 1, None) diff --git a/weasyprint/layout/float.py b/weasyprint/layout/float.py index 23b8bfcc..4eed8af3 100644 --- a/weasyprint/layout/float.py +++ b/weasyprint/layout/float.py @@ -66,7 +66,7 @@ def float_layout(context, box, containing_block, absolute_boxes, fixed_boxes): context, box, max_position_y=float('inf'), skip_stack=None, page_is_empty=False, absolute_boxes=absolute_boxes, fixed_boxes=fixed_boxes, - adjoining_margins=None) + adjoining_margins=None, discard=False) context.finish_block_formatting_context(box) elif isinstance(box, boxes.FlexContainerBox): box, _, _, _, _ = flex_layout( diff --git a/weasyprint/layout/inlines.py b/weasyprint/layout/inlines.py index 2dcf183e..81e5b212 100644 --- a/weasyprint/layout/inlines.py +++ b/weasyprint/layout/inlines.py @@ -11,7 +11,7 @@ import unicodedata from ..css import computed_from_cascaded from ..css.computed_values import ex_ratio, strut_layout from ..formatting_structure import boxes -from ..text import can_break_text, create_layout, split_first_line +from ..text.line_break import can_break_text, create_layout, split_first_line from .absolute import AbsolutePlaceholder, absolute_layout from .flex import flex_layout from .float import avoid_collisions, float_layout @@ -498,8 +498,8 @@ def replaced_box_height(box): def inline_replaced_box_layout(box, containing_block): """Lay out an inline :class:`boxes.ReplacedBox` ``box``.""" for side in ['top', 'right', 'bottom', 'left']: - if getattr(box, 'margin_' + side) == 'auto': - setattr(box, 'margin_' + side, 0) + if getattr(box, f'margin_{side}') == 'auto': + setattr(box, f'margin_{side}', 0) inline_replaced_box_width_height(box, containing_block) @@ -616,7 +616,7 @@ def inline_block_box_layout(context, box, position_x, skip_stack, box, _, _, _, _ = block_container_layout( context, box, max_position_y=float('inf'), skip_stack=skip_stack, page_is_empty=True, absolute_boxes=absolute_boxes, - fixed_boxes=fixed_boxes) + fixed_boxes=fixed_boxes, adjoining_margins=None, discard=False) box.baseline = inline_block_baseline(box) return box @@ -719,8 +719,8 @@ def split_inline_level(context, box, position_x, max_x, skip_stack, box.position_x = position_x box.position_y = 0 for side in ['top', 'right', 'bottom', 'left']: - if getattr(box, 'margin_' + side) == 'auto': - setattr(box, 'margin_' + side, 0) + if getattr(box, f'margin_{side}') == 'auto': + setattr(box, f'margin_{side}', 0) new_box, resume_at, _, _, _ = flex_layout( context, box, float('inf'), skip_stack, containing_block, False, absolute_boxes, fixed_boxes) diff --git a/weasyprint/layout/pages.py b/weasyprint/layout/pages.py index 01483a8e..f8f4b4db 100644 --- a/weasyprint/layout/pages.py +++ b/weasyprint/layout/pages.py @@ -437,7 +437,8 @@ def margin_box_content_layout(context, page, box): box, resume_at, next_page, _, _ = block_container_layout( context, box, max_position_y=float('inf'), skip_stack=None, - page_is_empty=True, absolute_boxes=[], fixed_boxes=[]) + page_is_empty=True, absolute_boxes=[], fixed_boxes=[], + adjoining_margins=None, discard=False) assert resume_at is None vertical_align = box.style['vertical_align'] @@ -548,7 +549,7 @@ def make_page(context, root_box, page_type, resume_at, page_number, root_box, resume_at, next_page, _, _ = block_level_layout( context, root_box, page_content_bottom, resume_at, initial_containing_block, page_is_empty, positioned_boxes, - positioned_boxes, adjoining_margins) + positioned_boxes, adjoining_margins, discard=False) assert root_box page.fixed_boxes = [ diff --git a/weasyprint/layout/preferred.py b/weasyprint/layout/preferred.py index 1c298c12..41a185da 100644 --- a/weasyprint/layout/preferred.py +++ b/weasyprint/layout/preferred.py @@ -12,8 +12,8 @@ import sys -from .. import text from ..formatting_structure import boxes +from ..text.line_break import split_first_line from .replaced import default_image_sizing @@ -294,7 +294,7 @@ def inline_line_widths(context, box, outer, is_line_start, minimum, while new_resume_at is not None: resume_at += new_resume_at _, _, new_resume_at, width, _, _ = ( - text.split_first_line( + split_first_line( child_text[resume_at:], child.style, context, max_width, child.justification_spacing, minimum=True)) diff --git a/weasyprint/layout/tables.py b/weasyprint/layout/tables.py index cbb7bd8f..5e9c0ca4 100644 --- a/weasyprint/layout/tables.py +++ b/weasyprint/layout/tables.py @@ -142,12 +142,11 @@ def table_layout(context, table, max_position_y, skip_stack, containing_block, cell.computed_height = cell.height cell.height = 'auto' cell, _, _, _, _ = block_container_layout( - context, cell, - max_position_y=float('inf'), - skip_stack=None, - page_is_empty=False, + context, cell, max_position_y=float('inf'), + skip_stack=None, page_is_empty=False, absolute_boxes=absolute_boxes, - fixed_boxes=fixed_boxes) + fixed_boxes=fixed_boxes, adjoining_margins=None, + discard=False) cell.empty = not any( child.is_floated() or child.is_in_normal_flow() for child in cell.children) diff --git a/weasyprint/text.py b/weasyprint/text.py deleted file mode 100644 index b579c5b1..00000000 --- a/weasyprint/text.py +++ /dev/null @@ -1,1448 +0,0 @@ -""" - weasyprint.text - --------------- - - Interface with Pango to decide where to do line breaks and to draw text. - -""" - -import re - -import cffi -import pyphen - -from .logger import LOGGER - -ffi = cffi.FFI() -ffi.cdef(''' - // HarfBuzz - - typedef ... hb_font_t; - typedef ... hb_face_t; - typedef ... hb_blob_t; - hb_face_t * hb_font_get_face (hb_font_t *font); - hb_blob_t * hb_face_reference_blob (hb_face_t *face); - const char * hb_blob_get_data (hb_blob_t *blob, unsigned int *length); - - // Pango - - typedef unsigned int guint; - typedef int gint; - typedef char gchar; - typedef gint gboolean; - typedef void* gpointer; - typedef ... PangoLayout; - typedef ... PangoContext; - typedef ... PangoFontMap; - typedef ... PangoFontMetrics; - typedef ... PangoLanguage; - typedef ... PangoTabArray; - typedef ... PangoFontDescription; - typedef ... PangoLayoutIter; - typedef ... PangoAttrList; - typedef ... PangoAttrClass; - typedef ... PangoFont; - typedef guint PangoGlyph; - typedef guint PangoGlyphUnit; - - const guint PANGO_GLYPH_EMPTY = 0x0FFFFFFF; - - typedef enum { - PANGO_STYLE_NORMAL, - PANGO_STYLE_OBLIQUE, - PANGO_STYLE_ITALIC - } PangoStyle; - - typedef enum { - PANGO_WEIGHT_THIN = 100, - PANGO_WEIGHT_ULTRALIGHT = 200, - PANGO_WEIGHT_LIGHT = 300, - PANGO_WEIGHT_BOOK = 380, - PANGO_WEIGHT_NORMAL = 400, - PANGO_WEIGHT_MEDIUM = 500, - PANGO_WEIGHT_SEMIBOLD = 600, - PANGO_WEIGHT_BOLD = 700, - PANGO_WEIGHT_ULTRABOLD = 800, - PANGO_WEIGHT_HEAVY = 900, - PANGO_WEIGHT_ULTRAHEAVY = 1000 - } PangoWeight; - - typedef enum { - PANGO_STRETCH_ULTRA_CONDENSED, - PANGO_STRETCH_EXTRA_CONDENSED, - PANGO_STRETCH_CONDENSED, - PANGO_STRETCH_SEMI_CONDENSED, - PANGO_STRETCH_NORMAL, - PANGO_STRETCH_SEMI_EXPANDED, - PANGO_STRETCH_EXPANDED, - PANGO_STRETCH_EXTRA_EXPANDED, - PANGO_STRETCH_ULTRA_EXPANDED - } PangoStretch; - - typedef enum { - PANGO_WRAP_WORD, - PANGO_WRAP_CHAR, - PANGO_WRAP_WORD_CHAR - } PangoWrapMode; - - typedef enum { - PANGO_TAB_LEFT - } PangoTabAlign; - - typedef enum { - PANGO_ELLIPSIZE_NONE, - PANGO_ELLIPSIZE_START, - PANGO_ELLIPSIZE_MIDDLE, - PANGO_ELLIPSIZE_END - } PangoEllipsizeMode; - - typedef struct GSList { - gpointer data; - struct GSList *next; - } GSList; - - typedef struct { - const PangoAttrClass *klass; - guint start_index; - guint end_index; - } PangoAttribute; - - typedef struct { - PangoLayout *layout; - gint start_index; - gint length; - GSList *runs; - guint is_paragraph_start : 1; - guint resolved_dir : 3; - } PangoLayoutLine; - - typedef struct { - int x; - int y; - int width; - int height; - } PangoRectangle; - - typedef struct { - guint is_line_break: 1; - guint is_mandatory_break : 1; - guint is_char_break : 1; - guint is_white : 1; - guint is_cursor_position : 1; - guint is_word_start : 1; - guint is_word_end : 1; - guint is_sentence_boundary : 1; - guint is_sentence_start : 1; - guint is_sentence_end : 1; - guint backspace_deletes_character : 1; - guint is_expandable_space : 1; - guint is_word_boundary : 1; - } PangoLogAttr; - - typedef struct { - void *shape_engine; - void *lang_engine; - PangoFont *font; - guint level; - guint gravity; - guint flags; - guint script; - PangoLanguage *language; - GSList *extra_attrs; - } PangoAnalysis; - - typedef struct { - gint offset; - gint length; - gint num_chars; - PangoAnalysis analysis; - } PangoItem; - - typedef struct { - PangoGlyphUnit width; - PangoGlyphUnit x_offset; - PangoGlyphUnit y_offset; - } PangoGlyphGeometry; - - typedef struct { - guint is_cluster_start : 1; - } PangoGlyphVisAttr; - - typedef struct { - PangoGlyph glyph; - PangoGlyphGeometry geometry; - PangoGlyphVisAttr attr; - } PangoGlyphInfo; - - typedef struct { - gint num_glyphs; - PangoGlyphInfo *glyphs; - gint *log_clusters; - } PangoGlyphString; - - typedef struct { - PangoItem *item; - PangoGlyphString *glyphs; - } PangoGlyphItem; - - int pango_version (void); - - double pango_units_to_double (int i); - int pango_units_from_double (double d); - void g_object_unref (gpointer object); - void g_type_init (void); - - PangoLayout * pango_layout_new (PangoContext *context); - void pango_layout_set_width (PangoLayout *layout, int width); - PangoAttrList * pango_layout_get_attributes(PangoLayout *layout); - void pango_layout_set_attributes ( - PangoLayout *layout, PangoAttrList *attrs); - void pango_layout_set_text ( - PangoLayout *layout, const char *text, int length); - void pango_layout_set_tabs ( - PangoLayout *layout, PangoTabArray *tabs); - void pango_layout_set_font_description ( - PangoLayout *layout, const PangoFontDescription *desc); - void pango_layout_set_wrap ( - PangoLayout *layout, PangoWrapMode wrap); - void pango_layout_set_single_paragraph_mode ( - PangoLayout *layout, gboolean setting); - int pango_layout_get_baseline (PangoLayout *layout); - PangoLayoutLine * pango_layout_get_line_readonly ( - PangoLayout *layout, int line); - - hb_font_t * pango_font_get_hb_font (PangoFont *font); - - PangoFontDescription * pango_font_description_new (void); - void pango_font_description_free (PangoFontDescription *desc); - PangoFontDescription * pango_font_description_copy ( - const PangoFontDescription *desc); - void pango_font_description_set_family ( - PangoFontDescription *desc, const char *family); - void pango_font_description_set_style ( - PangoFontDescription *desc, PangoStyle style); - void pango_font_description_set_stretch ( - PangoFontDescription *desc, PangoStretch stretch); - void pango_font_description_set_weight ( - PangoFontDescription *desc, PangoWeight weight); - void pango_font_description_set_absolute_size ( - PangoFontDescription *desc, double size); - int pango_font_description_get_size (PangoFontDescription *desc); - - int pango_glyph_string_get_width (PangoGlyphString *glyphs); - - PangoFontDescription * pango_font_describe (PangoFont *font); - const char * pango_font_description_get_family ( - const PangoFontDescription *desc); - - PangoContext * pango_context_new (); - PangoContext * pango_font_map_create_context (PangoFontMap *fontmap); - - PangoFontMetrics * pango_context_get_metrics ( - PangoContext *context, const PangoFontDescription *desc, - PangoLanguage *language); - void pango_font_metrics_unref (PangoFontMetrics *metrics); - int pango_font_metrics_get_ascent (PangoFontMetrics *metrics); - int pango_font_metrics_get_descent (PangoFontMetrics *metrics); - int pango_font_metrics_get_approximate_char_width ( - PangoFontMetrics *metrics); - int pango_font_metrics_get_approximate_digit_width ( - PangoFontMetrics *metrics); - int pango_font_metrics_get_underline_thickness ( - PangoFontMetrics *metrics); - int pango_font_metrics_get_underline_position ( - PangoFontMetrics *metrics); - int pango_font_metrics_get_strikethrough_thickness ( - PangoFontMetrics *metrics); - int pango_font_metrics_get_strikethrough_position ( - PangoFontMetrics *metrics); - - void pango_context_set_round_glyph_positions ( - PangoContext *context, gboolean round_positions); - - PangoFontMetrics * pango_font_get_metrics ( - PangoFont *font, PangoLanguage *language); - - void pango_font_get_glyph_extents ( - PangoFont *font, PangoGlyph glyph, PangoRectangle *ink_rect, - PangoRectangle *logical_rect); - - PangoAttrList * pango_attr_list_new (void); - void pango_attr_list_unref (PangoAttrList *list); - void pango_attr_list_insert ( - PangoAttrList *list, PangoAttribute *attr); - void pango_attr_list_change ( - PangoAttrList *list, PangoAttribute *attr); - PangoAttribute * pango_attr_font_features_new (const gchar *features); - PangoAttribute * pango_attr_letter_spacing_new (int letter_spacing); - void pango_attribute_destroy (PangoAttribute *attr); - - PangoTabArray * pango_tab_array_new_with_positions ( - gint size, gboolean positions_in_pixels, PangoTabAlign first_alignment, - gint first_position, ...); - void pango_tab_array_free (PangoTabArray *tab_array); - - PangoLanguage * pango_language_from_string (const char *language); - PangoLanguage * pango_language_get_default (void); - void pango_context_set_language ( - PangoContext *context, PangoLanguage *language); - void pango_context_set_font_map ( - PangoContext *context, PangoFontMap *font_map); - - void pango_layout_line_get_extents ( - PangoLayoutLine *line, - PangoRectangle *ink_rect, PangoRectangle *logical_rect); - - PangoContext * pango_layout_get_context (PangoLayout *layout); - void pango_layout_set_ellipsize ( - PangoLayout *layout, - PangoEllipsizeMode ellipsize); - - void pango_get_log_attrs ( - const char *text, int length, int level, PangoLanguage *language, - PangoLogAttr *log_attrs, int attrs_len); -''') - - -def dlopen(ffi, *names): - """Try various names for the same library, for different platforms.""" - for name in names: - try: - return ffi.dlopen(name) - except OSError: - pass - # Re-raise the exception. - return ffi.dlopen(names[0]) # pragma: no cover - - -gobject = dlopen(ffi, 'gobject-2.0-0', 'gobject-2.0', 'libgobject-2.0-0', - 'libgobject-2.0.so.0', 'libgobject-2.0.dylib') -pango = dlopen(ffi, 'pango-1.0-0', 'pango-1.0', 'libpango-1.0-0', - 'libpango-1.0.so.0', 'libpango-1.0.dylib') -harfbuzz = dlopen( - ffi, 'harfbuzz', 'harfbuzz-0.0', 'libharfbuzz-0', - 'libharfbuzz.so.0', 'libharfbuzz.so.0', 'libharfbuzz.0.dylib') - -gobject.g_type_init() - -units_to_double = pango.pango_units_to_double -units_from_double = pango.pango_units_from_double - - -PANGO_STYLE = { - 'normal': pango.PANGO_STYLE_NORMAL, - 'oblique': pango.PANGO_STYLE_OBLIQUE, - 'italic': pango.PANGO_STYLE_ITALIC, -} - -PANGO_STRETCH = { - 'ultra-condensed': pango.PANGO_STRETCH_ULTRA_CONDENSED, - 'extra-condensed': pango.PANGO_STRETCH_EXTRA_CONDENSED, - 'condensed': pango.PANGO_STRETCH_CONDENSED, - 'semi-condensed': pango.PANGO_STRETCH_SEMI_CONDENSED, - 'normal': pango.PANGO_STRETCH_NORMAL, - 'semi-expanded': pango.PANGO_STRETCH_SEMI_EXPANDED, - 'expanded': pango.PANGO_STRETCH_EXPANDED, - 'extra-expanded': pango.PANGO_STRETCH_EXTRA_EXPANDED, - 'ultra-expanded': pango.PANGO_STRETCH_ULTRA_EXPANDED, -} - -PANGO_WRAP_MODE = { - 'WRAP_WORD': pango.PANGO_WRAP_WORD, - 'WRAP_CHAR': pango.PANGO_WRAP_CHAR, - 'WRAP_WORD_CHAR': pango.PANGO_WRAP_WORD_CHAR -} - -# From http://www.microsoft.com/typography/otspec/languagetags.htm -LST_TO_ISO = { - 'aba': 'abq', - 'afk': 'afr', - 'afr': 'aar', - 'agw': 'ahg', - 'als': 'gsw', - 'alt': 'atv', - 'ari': 'aiw', - 'ark': 'mhv', - 'ath': 'apk', - 'avr': 'ava', - 'bad': 'bfq', - 'bad0': 'bad', - 'bag': 'bfy', - 'bal': 'krc', - 'bau': 'bci', - 'bch': 'bcq', - 'bgr': 'bul', - 'bil': 'byn', - 'bkf': 'bla', - 'bli': 'bal', - 'bln': 'bjt', - 'blt': 'bft', - 'bmb': 'bam', - 'bri': 'bra', - 'brm': 'mya', - 'bsh': 'bak', - 'bti': 'btb', - 'chg': 'sgw', - 'chh': 'hne', - 'chi': 'nya', - 'chk': 'ckt', - 'chk0': 'chk', - 'chu': 'chv', - 'chy': 'chy', - 'cmr': 'swb', - 'crr': 'crx', - 'crt': 'crh', - 'csl': 'chu', - 'csy': 'ces', - 'dcr': 'cwd', - 'dgr': 'doi', - 'djr': 'dje', - 'djr0': 'djr', - 'dng': 'ada', - 'dnk': 'din', - 'dri': 'prs', - 'dun': 'dng', - 'dzn': 'dzo', - 'ebi': 'igb', - 'ecr': 'crj', - 'edo': 'bin', - 'erz': 'myv', - 'esp': 'spa', - 'eti': 'est', - 'euq': 'eus', - 'evk': 'evn', - 'evn': 'eve', - 'fan': 'acf', - 'fan0': 'fan', - 'far': 'fas', - 'fji': 'fij', - 'fle': 'vls', - 'fne': 'enf', - 'fos': 'fao', - 'fri': 'fry', - 'frl': 'fur', - 'frp': 'frp', - 'fta': 'fuf', - 'gad': 'gaa', - 'gae': 'gla', - 'gal': 'glg', - 'gaw': 'gbm', - 'gil': 'niv', - 'gil0': 'gil', - 'gmz': 'guk', - 'grn': 'kal', - 'gro': 'grt', - 'gua': 'grn', - 'hai': 'hat', - 'hal': 'flm', - 'har': 'hoj', - 'hbn': 'amf', - 'hma': 'mrj', - 'hnd': 'hno', - 'ho': 'hoc', - 'hri': 'har', - 'hye0': 'hye', - 'ijo': 'ijc', - 'ing': 'inh', - 'inu': 'iku', - 'iri': 'gle', - 'irt': 'gle', - 'ism': 'smn', - 'iwr': 'heb', - 'jan': 'jpn', - 'jii': 'yid', - 'jud': 'lad', - 'jul': 'dyu', - 'kab': 'kbd', - 'kab0': 'kab', - 'kac': 'kfr', - 'kal': 'kln', - 'kar': 'krc', - 'keb': 'ktb', - 'kge': 'kat', - 'kha': 'kjh', - 'khk': 'kca', - 'khs': 'kca', - 'khv': 'kca', - 'kis': 'kqs', - 'kkn': 'kex', - 'klm': 'xal', - 'kmb': 'kam', - 'kmn': 'kfy', - 'kmo': 'kmw', - 'kms': 'kxc', - 'knr': 'kau', - 'kod': 'kfa', - 'koh': 'okm', - 'kon': 'ktu', - 'kon0': 'kon', - 'kop': 'koi', - 'koz': 'kpv', - 'kpl': 'kpe', - 'krk': 'kaa', - 'krm': 'kdr', - 'krn': 'kar', - 'krt': 'kqy', - 'ksh': 'kas', - 'ksh0': 'ksh', - 'ksi': 'kha', - 'ksm': 'sjd', - 'kui': 'kxu', - 'kul': 'kfx', - 'kuu': 'kru', - 'kuy': 'kdt', - 'kyk': 'kpy', - 'lad': 'lld', - 'lah': 'bfu', - 'lak': 'lbe', - 'lam': 'lmn', - 'laz': 'lzz', - 'lcr': 'crm', - 'ldk': 'lbj', - 'lma': 'mhr', - 'lmb': 'lif', - 'lmw': 'ngl', - 'lsb': 'dsb', - 'lsm': 'smj', - 'lth': 'lit', - 'luh': 'luy', - 'lvi': 'lav', - 'maj': 'mpe', - 'mak': 'vmw', - 'man': 'mns', - 'map': 'arn', - 'maw': 'mwr', - 'mbn': 'kmb', - 'mch': 'mnc', - 'mcr': 'crm', - 'mde': 'men', - 'men': 'mym', - 'miz': 'lus', - 'mkr': 'mak', - 'mle': 'mdy', - 'mln': 'mlq', - 'mlr': 'mal', - 'mly': 'msa', - 'mnd': 'mnk', - 'mng': 'mon', - 'mnk': 'man', - 'mnx': 'glv', - 'mok': 'mdf', - 'mon': 'mnw', - 'mth': 'mai', - 'mts': 'mlt', - 'mun': 'unr', - 'nan': 'gld', - 'nas': 'nsk', - 'ncr': 'csw', - 'ndg': 'ndo', - 'nhc': 'csw', - 'nis': 'dap', - 'nkl': 'nyn', - 'nko': 'nqo', - 'nor': 'nob', - 'nsm': 'sme', - 'nta': 'nod', - 'nto': 'epo', - 'nyn': 'nno', - 'ocr': 'ojs', - 'ojb': 'oji', - 'oro': 'orm', - 'paa': 'sam', - 'pal': 'pli', - 'pap': 'plp', - 'pap0': 'pap', - 'pas': 'pus', - 'pgr': 'ell', - 'pil': 'fil', - 'plg': 'pce', - 'plk': 'pol', - 'ptg': 'por', - 'qin': 'bgr', - 'rbu': 'bxr', - 'rcr': 'atj', - 'rms': 'roh', - 'rom': 'ron', - 'roy': 'rom', - 'rsy': 'rue', - 'rua': 'kin', - 'sad': 'sck', - 'say': 'chp', - 'sek': 'xan', - 'sel': 'sel', - 'sgo': 'sag', - 'sgs': 'sgs', - 'sib': 'sjo', - 'sig': 'xst', - 'sks': 'sms', - 'sky': 'slk', - 'sla': 'scs', - 'sml': 'som', - 'sna': 'seh', - 'sna0': 'sna', - 'snh': 'sin', - 'sog': 'gru', - 'srb': 'srp', - 'ssl': 'xsl', - 'ssm': 'sma', - 'sur': 'suq', - 'sve': 'swe', - 'swa': 'aii', - 'swk': 'swa', - 'swz': 'ssw', - 'sxt': 'ngo', - 'taj': 'tgk', - 'tcr': 'cwd', - 'tgn': 'ton', - 'tgr': 'tig', - 'tgy': 'tir', - 'tht': 'tah', - 'tib': 'bod', - 'tkm': 'tuk', - 'tmn': 'tem', - 'tna': 'tsn', - 'tne': 'enh', - 'tng': 'toi', - 'tod': 'xal', - 'tod0': 'tod', - 'trk': 'tur', - 'tsg': 'tso', - 'tua': 'tru', - 'tul': 'tcy', - 'tuv': 'tyv', - 'twi': 'aka', - 'usb': 'hsb', - 'uyg': 'uig', - 'vit': 'vie', - 'vro': 'vro', - 'wa': 'wbm', - 'wag': 'wbr', - 'wcr': 'crk', - 'wel': 'cym', - 'wlf': 'wol', - 'xbd': 'khb', - 'xhs': 'xho', - 'yak': 'sah', - 'yba': 'yor', - 'ycr': 'cre', - 'yim': 'iii', - 'zhh': 'zho', - 'zhp': 'zho', - 'zhs': 'zho', - 'zht': 'zho', - 'znd': 'zne', -} - - -def utf8_slice(string, slice_): - return string.encode('utf-8')[slice_].decode('utf-8') - - -def unicode_to_char_p(string): - """Return ``(pointer, bytestring)``. - - The byte string must live at least as long as the pointer is used. - - """ - bytestring = string.encode('utf8').replace(b'\x00', b'') - return ffi.new('char[]', bytestring), bytestring - - -def get_size(line, style): - logical_extents = ffi.new('PangoRectangle *') - pango.pango_layout_line_get_extents(line, ffi.NULL, logical_extents) - width, height = (units_to_double(logical_extents.width), - units_to_double(logical_extents.height)) - ffi.release(logical_extents) - if style['letter_spacing'] != 'normal': - width += style['letter_spacing'] - return width, height - - -def get_ink_position(line): - ink_extents = ffi.new('PangoRectangle *') - pango.pango_layout_line_get_extents(line, ink_extents, ffi.NULL) - values = (units_to_double(ink_extents.x), units_to_double(ink_extents.y)) - ffi.release(ink_extents) - return values - - -def first_line_metrics(first_line, text, layout, resume_at, space_collapse, - style, hyphenated=False, hyphenation_character=None): - length = first_line.length - if hyphenated: - length -= len(hyphenation_character.encode('utf8')) - elif resume_at: - # Set an infinite width as we don't want to break lines when drawing, - # the lines have already been split and the size may differ. Rendering - # is also much faster when no width is set. - pango.pango_layout_set_width(layout.layout, -1) - - # Create layout with final text - first_line_text = utf8_slice(text, slice(length)) - - # Remove trailing spaces if spaces collapse - if space_collapse: - first_line_text = first_line_text.rstrip(' ') - - # Remove soft hyphens - layout.set_text(first_line_text.replace('\u00ad', '')) - - first_line, _ = layout.get_first_line() - length = first_line.length if first_line is not None else 0 - - if '\u00ad' in first_line_text: - soft_hyphens = 0 - if first_line_text[0] == '\u00ad': - length += 2 # len('\u00ad'.encode('utf8')) - for i in range(len(layout.text)): - while i + soft_hyphens + 1 < len(first_line_text): - if first_line_text[i + soft_hyphens + 1] == '\u00ad': - soft_hyphens += 1 - else: - break - length += soft_hyphens * 2 # len('\u00ad'.encode('utf8')) - - width, height = get_size(first_line, style) - baseline = units_to_double(pango.pango_layout_get_baseline(layout.layout)) - layout.deactivate() - return layout, length, resume_at, width, height, baseline - - -class Layout: - """Object holding PangoLayout-related cdata pointers.""" - def __init__(self, context, font_size, style, justification_spacing=0, - max_width=None): - self.justification_spacing = justification_spacing - self.setup(context, font_size, style) - self.max_width = max_width - - def setup(self, context, font_size, style): - self.context = context - self.style = style - self.first_line_direction = 0 - - if context is None: - # TODO: fix this ugly import - from .fonts import pangoft2 - font_map = ffi.gc( - pangoft2.pango_ft2_font_map_new(), gobject.g_object_unref) - else: - font_map = context.font_config.font_map - pango_context = pango.pango_font_map_create_context(font_map) - pango.pango_context_set_round_glyph_positions(pango_context, False) - self.layout = ffi.gc( - pango.pango_layout_new(pango_context), - gobject.g_object_unref) - - if style['font_language_override'] != 'normal': - lang_p, lang = unicode_to_char_p(LST_TO_ISO.get( - style['font_language_override'].lower(), - style['font_language_override'])) - elif style['lang']: - lang_p, lang = unicode_to_char_p(style['lang']) - else: - lang = None - self.language = pango.pango_language_get_default() - if lang: - self.language = pango.pango_language_from_string(lang_p) - pango.pango_context_set_language(pango_context, self.language) - gobject.g_object_unref(pango_context) - - assert not isinstance(style['font_family'], str), ( - 'font_family should be a list') - self.font = ffi.gc( - pango.pango_font_description_new(), - pango.pango_font_description_free) - family_p, family = unicode_to_char_p(','.join(style['font_family'])) - pango.pango_font_description_set_family(self.font, family_p) - pango.pango_font_description_set_style( - self.font, PANGO_STYLE[style['font_style']]) - pango.pango_font_description_set_stretch( - self.font, PANGO_STRETCH[style['font_stretch']]) - pango.pango_font_description_set_weight( - self.font, style['font_weight']) - pango.pango_font_description_set_absolute_size( - self.font, units_from_double(font_size)) - pango.pango_layout_set_font_description(self.layout, self.font) - - features = get_font_features( - style['font_kerning'], style['font_variant_ligatures'], - style['font_variant_position'], style['font_variant_caps'], - style['font_variant_numeric'], style['font_variant_alternates'], - style['font_variant_east_asian'], style['font_feature_settings']) - if features and context: - features = ','.join( - f'{key} {value}' for key, value in features.items()) - - # TODO: attributes should be freed. - # In the meantime, keep a cache to avoid leaking too many of them. - attr = context.font_features.get(features) - if attr is None: - try: - attr = pango.pango_attr_font_features_new( - features.encode('ascii')) - except AttributeError: - LOGGER.error( - 'OpenType features are not available ' - 'with Pango < 1.38') - else: - context.font_features[features] = attr - if attr is not None: - attr_list = pango.pango_attr_list_new() - pango.pango_attr_list_insert(attr_list, attr) - pango.pango_layout_set_attributes(self.layout, attr_list) - - def get_first_line(self): - first_line = pango.pango_layout_get_line_readonly(self.layout, 0) - second_line = pango.pango_layout_get_line_readonly(self.layout, 1) - if second_line != ffi.NULL: - index = second_line.start_index - else: - index = None - self.first_line_direction = first_line.resolved_dir - return first_line, index - - def set_text(self, text, justify=False): - try: - # Keep only the first line plus one character, we don't need more - text = text[:text.index('\n') + 2] - except ValueError: - # End-of-line not found, keept the whole text - pass - text, bytestring = unicode_to_char_p(text) - self.text = bytestring.decode('utf-8') - pango.pango_layout_set_text(self.layout, text, -1) - - # Word spacing may not be set if we're trying to get word-spacing - # computed value using a layout, for example if its unit is ex. - word_spacing = self.style.get('word_spacing', 0) - if justify: - # Justification is needed when drawing text but is useless during - # layout. Ignore it before layout is reactivated before the drawing - # step. - word_spacing += self.justification_spacing - - # Letter spacing may not be set if we're trying to get letter-spacing - # computed value using a layout, for example if its unit is ex. - letter_spacing = self.style.get('letter_spacing', 'normal') - if letter_spacing == 'normal': - letter_spacing = 0 - - if text and (word_spacing != 0 or letter_spacing != 0): - letter_spacing = units_from_double(letter_spacing) - space_spacing = units_from_double(word_spacing) + letter_spacing - attr_list = pango.pango_layout_get_attributes(self.layout) - if not attr_list: - # TODO: list should be freed - attr_list = pango.pango_attr_list_new() - - def add_attr(start, end, spacing): - # TODO: attributes should be freed - attr = pango.pango_attr_letter_spacing_new(spacing) - attr.start_index, attr.end_index = start, end - pango.pango_attr_list_change(attr_list, attr) - - add_attr(0, len(bytestring) + 1, letter_spacing) - position = bytestring.find(b' ') - while position != -1: - add_attr(position, position + 1, space_spacing) - position = bytestring.find(b' ', position + 1) - - pango.pango_layout_set_attributes(self.layout, attr_list) - - # Tabs width - if b'\t' in bytestring: - self.set_tabs() - - def get_font_metrics(self): - context = pango.pango_layout_get_context(self.layout) - return FontMetrics(context, self.font, self.language) - - def set_wrap(self, wrap_mode): - pango.pango_layout_set_wrap(self.layout, wrap_mode) - - def set_tabs(self): - if isinstance(self.style['tab_size'], int): - layout = Layout( - self.context, self.style['font_size'], self.style, - self.justification_spacing) - layout.set_text(' ' * self.style['tab_size']) - line, _ = layout.get_first_line() - width, _ = get_size(line, self.style) - width = int(round(width)) - else: - width = int(self.style['tab_size'].value) - # 0 is not handled correctly by Pango - array = ffi.gc( - pango.pango_tab_array_new_with_positions( - 1, True, pango.PANGO_TAB_LEFT, width or 1), - pango.pango_tab_array_free) - pango.pango_layout_set_tabs(self.layout, array) - - def deactivate(self): - del self.layout, self.font, self.language, self.style - - def reactivate(self, style): - self.setup(self.context, style['font_size'], style) - self.set_text(self.text, justify=True) - - -class FontMetrics: - def __init__(self, context, font, language): - self.metrics = ffi.gc( - pango.pango_context_get_metrics(context, font, language), - pango.pango_font_metrics_unref) - - def __dir__(self): - return ['ascent', 'descent', - 'approximate_char_width', 'approximate_digit_width', - 'underline_thickness', 'underline_position', - 'strikethrough_thickness', 'strikethrough_position'] - - def __getattr__(self, key): - if key in dir(self): - return units_to_double( - getattr(pango, 'pango_font_metrics_get_' + key)(self.metrics)) - - -def get_font_features( - font_kerning='normal', font_variant_ligatures='normal', - font_variant_position='normal', font_variant_caps='normal', - font_variant_numeric='normal', font_variant_alternates='normal', - font_variant_east_asian='normal', font_feature_settings='normal'): - """Get the font features from the different properties in style. - - See https://www.w3.org/TR/css-fonts-3/#feature-precedence - - """ - features = {} - ligature_keys = { - 'common-ligatures': ['liga', 'clig'], - 'historical-ligatures': ['hlig'], - 'discretionary-ligatures': ['dlig'], - 'contextual': ['calt']} - caps_keys = { - 'small-caps': ['smcp'], - 'all-small-caps': ['c2sc', 'smcp'], - 'petite-caps': ['pcap'], - 'all-petite-caps': ['c2pc', 'pcap'], - 'unicase': ['unic'], - 'titling-caps': ['titl']} - numeric_keys = { - 'lining-nums': 'lnum', - 'oldstyle-nums': 'onum', - 'proportional-nums': 'pnum', - 'tabular-nums': 'tnum', - 'diagonal-fractions': 'frac', - 'stacked-fractions': 'afrc', - 'ordinal': 'ordn', - 'slashed-zero': 'zero'} - east_asian_keys = { - 'jis78': 'jp78', - 'jis83': 'jp83', - 'jis90': 'jp90', - 'jis04': 'jp04', - 'simplified': 'smpl', - 'traditional': 'trad', - 'full-width': 'fwid', - 'proportional-width': 'pwid', - 'ruby': 'ruby'} - - # Step 1: getting the default, we rely on Pango for this - # Step 2: @font-face font-variant, done in fonts.add_font_face - # Step 3: @font-face font-feature-settings, done in fonts.add_font_face - - # Step 4: font-variant and OpenType features - - if font_kerning != 'auto': - features['kern'] = int(font_kerning == 'normal') - - if font_variant_ligatures == 'none': - for keys in ligature_keys.values(): - for key in keys: - features[key] = 0 - elif font_variant_ligatures != 'normal': - for ligature_type in font_variant_ligatures: - value = 1 - if ligature_type.startswith('no-'): - value = 0 - ligature_type = ligature_type[3:] - for key in ligature_keys[ligature_type]: - features[key] = value - - if font_variant_position == 'sub': - # TODO: the specification asks for additional checks - # https://www.w3.org/TR/css-fonts-3/#font-variant-position-prop - features['subs'] = 1 - elif font_variant_position == 'super': - features['sups'] = 1 - - if font_variant_caps != 'normal': - # TODO: the specification asks for additional checks - # https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop - for key in caps_keys[font_variant_caps]: - features[key] = 1 - - if font_variant_numeric != 'normal': - for key in font_variant_numeric: - features[numeric_keys[key]] = 1 - - if font_variant_alternates != 'normal': - # TODO: support other values - # See https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop - if font_variant_alternates == 'historical-forms': - features['hist'] = 1 - - if font_variant_east_asian != 'normal': - for key in font_variant_east_asian: - features[east_asian_keys[key]] = 1 - - # Step 5: incompatible non-OpenType features, already handled by Pango - - # Step 6: font-feature-settings - - if font_feature_settings != 'normal': - features.update(dict(font_feature_settings)) - - return features - - -def create_layout(text, style, context, max_width, justification_spacing): - """Return an opaque Pango layout with default Pango line-breaks. - - :param text: Unicode - :param style: a style dict of computed values - :param max_width: - The maximum available width in the same unit as ``style['font_size']``, - or ``None`` for unlimited width. - - """ - layout = Layout( - context, style['font_size'], style, justification_spacing, max_width) - - # Make sure that max_width * Pango.SCALE == max_width * 1024 fits in a - # signed integer. Treat bigger values same as None: unconstrained width. - text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line') - if max_width is not None and text_wrap and max_width < 2 ** 21: - pango.pango_layout_set_width( - layout.layout, units_from_double(max(0, max_width))) - - layout.set_text(text) - return layout - - -def split_first_line(text, style, context, max_width, justification_spacing, - minimum=False): - """Fit as much as possible in the available width for one line of text. - - Return ``(layout, length, resume_at, width, height, baseline)``. - - ``layout``: a pango Layout with the first line - ``length``: length in UTF-8 bytes of the first line - ``resume_at``: The number of UTF-8 bytes to skip for the next line. - May be ``None`` if the whole text fits in one line. - This may be greater than ``length`` in case of preserved - newline characters. - ``width``: width in pixels of the first line - ``height``: height in pixels of the first line - ``baseline``: baseline in pixels of the first line - - """ - # See https://www.w3.org/TR/css-text-3/#white-space-property - text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line') - space_collapse = style['white_space'] in ('normal', 'nowrap', 'pre-line') - - original_max_width = max_width - if not text_wrap: - max_width = None - - # Step #1: Get a draft layout with the first line - layout = None - if (max_width is not None and max_width != float('inf') and - style['font_size']): - if max_width == 0: - # Trying to find minimum size, let's naively split on spaces and - # keep one word + one letter - space_index = text.find(' ') - if space_index == -1: - expected_length = len(text) - else: - expected_length = space_index + 2 # index + space + one letter - else: - expected_length = int(max_width / style['font_size'] * 2.5) - if expected_length < len(text): - # Try to use a small amount of text instead of the whole text - layout = create_layout( - text[:expected_length], style, context, max_width, - justification_spacing) - first_line, index = layout.get_first_line() - if index is None: - # The small amount of text fits in one line, give up and use - # the whole text - layout = None - if layout is None: - layout = create_layout( - text, style, context, original_max_width, justification_spacing) - first_line, index = layout.get_first_line() - resume_at = index - - # Step #2: Don't split lines when it's not needed - if max_width is None: - # The first line can take all the place needed - return first_line_metrics( - first_line, text, layout, resume_at, space_collapse, style) - first_line_width, _ = get_size(first_line, style) - if index is None and first_line_width <= max_width: - # The first line fits in the available width - return first_line_metrics( - first_line, text, layout, resume_at, space_collapse, style) - - # Step #3: Try to put the first word of the second line on the first line - # https://mail.gnome.org/archives/gtk-i18n-list/2013-September/msg00006 - # is a good thread related to this problem. - first_line_text = utf8_slice(text, slice(index)) - # We can’t rely on first_line_width, see - # https://github.com/Kozea/WeasyPrint/issues/1051 - first_line_fits = ( - first_line_width <= max_width or - ' ' in first_line_text.strip() or - can_break_text(first_line_text.strip(), style['lang'])) - if first_line_fits: - # The first line fits but may have been cut too early by Pango - second_line_text = utf8_slice(text, slice(index, None)) - else: - # The line can't be split earlier, try to hyphenate the first word. - first_line_text = '' - second_line_text = text - - next_word = second_line_text.split(' ', 1)[0] - if next_word: - if space_collapse: - # next_word might fit without a space afterwards - # only try when space collapsing is allowed - new_first_line_text = first_line_text + next_word - layout.set_text(new_first_line_text) - first_line, index = layout.get_first_line() - first_line_width, _ = get_size(first_line, style) - if index is None and first_line_text: - # The next word fits in the first line, keep the layout - resume_at = len(new_first_line_text.encode('utf-8')) + 1 - return first_line_metrics( - first_line, text, layout, resume_at, space_collapse, style) - elif index: - # Text may have been split elsewhere by Pango earlier - resume_at = index - else: - # Second line is none - resume_at = first_line.length + 1 - if resume_at >= len(text.encode('utf-8')): - resume_at = None - elif first_line_text: - # We found something on the first line but we did not find a word on - # the next line, no need to hyphenate, we can keep the current layout - return first_line_metrics( - first_line, text, layout, resume_at, space_collapse, style) - - # Step #4: Try to hyphenate - hyphens = style['hyphens'] - lang = style['lang'] and pyphen.language_fallback(style['lang']) - total, left, right = style['hyphenate_limit_chars'] - hyphenated = False - soft_hyphen = '\u00ad' - - try_hyphenate = False - if hyphens != 'none': - next_word_boundaries = get_next_word_boundaries(second_line_text, lang) - if next_word_boundaries: - # We have a word to hyphenate - start_word, stop_word = next_word_boundaries - next_word = second_line_text[start_word:stop_word] - if stop_word - start_word >= total: - # This word is long enough - first_line_width, _ = get_size(first_line, style) - space = max_width - first_line_width - if style['hyphenate_limit_zone'].unit == '%': - limit_zone = ( - max_width * style['hyphenate_limit_zone'].value / 100.) - else: - limit_zone = style['hyphenate_limit_zone'].value - if space > limit_zone or space < 0: - # Available space is worth the try, or the line is even too - # long to fit: try to hyphenate - try_hyphenate = True - - if try_hyphenate: - # Automatic hyphenation possible and next word is long enough - auto_hyphenation = hyphens == 'auto' and lang - manual_hyphenation = False - if auto_hyphenation: - if soft_hyphen in first_line_text or soft_hyphen in next_word: - # Automatic hyphenation opportunities within a word must be - # ignored if the word contains a conditional hyphen, in favor - # of the conditional hyphen(s). - # See https://drafts.csswg.org/css-text-3/#valdef-hyphens-auto - manual_hyphenation = True - else: - manual_hyphenation = hyphens == 'manual' - - if manual_hyphenation: - # Manual hyphenation: check that the line ends with a soft - # hyphen and add the missing hyphen - if first_line_text.endswith(soft_hyphen): - # The first line has been split on a soft hyphen - if ' ' in first_line_text: - first_line_text, next_word = ( - first_line_text.rsplit(' ', 1)) - next_word = ' ' + next_word - layout.set_text(first_line_text) - first_line, index = layout.get_first_line() - resume_at = len((first_line_text + ' ').encode('utf8')) - else: - first_line_text, next_word = '', first_line_text - soft_hyphen_indexes = [ - match.start() for match in re.finditer(soft_hyphen, next_word)] - soft_hyphen_indexes.reverse() - dictionary_iterations = [ - next_word[:i + 1] for i in soft_hyphen_indexes] - elif auto_hyphenation: - dictionary_key = (lang, left, right, total) - dictionary = context.dictionaries.get(dictionary_key) - if dictionary is None: - dictionary = pyphen.Pyphen(lang=lang, left=left, right=right) - context.dictionaries[dictionary_key] = dictionary - dictionary_iterations = [ - start for start, end in dictionary.iterate(next_word)] - else: - dictionary_iterations = [] - - if dictionary_iterations: - for first_word_part in dictionary_iterations: - new_first_line_text = ( - first_line_text + - second_line_text[:start_word] + - first_word_part) - hyphenated_first_line_text = ( - new_first_line_text + style['hyphenate_character']) - new_layout = create_layout( - hyphenated_first_line_text, style, context, max_width, - justification_spacing) - new_first_line, new_index = new_layout.get_first_line() - new_first_line_width, _ = get_size(new_first_line, style) - new_space = max_width - new_first_line_width - if new_index is None and ( - new_space >= 0 or - first_word_part == dictionary_iterations[-1]): - hyphenated = True - layout = new_layout - first_line = new_first_line - index = new_index - resume_at = len(new_first_line_text.encode('utf8')) - if text[len(new_first_line_text)] == soft_hyphen: - # Recreate the layout with no max_width to be sure that - # we don't break before the soft hyphen - pango.pango_layout_set_width( - layout.layout, units_from_double(-1)) - resume_at += len(soft_hyphen.encode('utf8')) - break - - if not hyphenated and not first_line_text: - # Recreate the layout with no max_width to be sure that - # we don't break before or inside the hyphenate character - hyphenated = True - layout.set_text(hyphenated_first_line_text) - pango.pango_layout_set_width( - layout.layout, units_from_double(-1)) - first_line, index = layout.get_first_line() - resume_at = len(new_first_line_text.encode('utf8')) - if text[len(first_line_text)] == soft_hyphen: - resume_at += len(soft_hyphen.encode('utf8')) - - if not hyphenated and first_line_text.endswith(soft_hyphen): - # Recreate the layout with no max_width to be sure that - # we don't break inside the hyphenate-character string - hyphenated = True - hyphenated_first_line_text = ( - first_line_text + style['hyphenate_character']) - layout.set_text(hyphenated_first_line_text) - pango.pango_layout_set_width( - layout.layout, units_from_double(-1)) - first_line, index = layout.get_first_line() - resume_at = len(first_line_text.encode('utf8')) - - # Step 5: Try to break word if it's too long for the line - overflow_wrap = style['overflow_wrap'] - first_line_width, _ = get_size(first_line, style) - space = max_width - first_line_width - # If we can break words and the first line is too long - if not minimum and overflow_wrap == 'break-word' and space < 0: - # Is it really OK to remove hyphenation for word-break ? - hyphenated = False - # TODO: Modify code to preserve W3C condition: - # "Shaping characters are still shaped as if the word were not broken" - # The way new lines are processed in this function (one by one with no - # memory of the last) prevents shaping characters (arabic, for - # instance) from keeping their shape when wrapped on the next line with - # pango layout. Maybe insert Unicode shaping characters in text? - layout.set_text(text) - pango.pango_layout_set_width( - layout.layout, units_from_double(max_width)) - layout.set_wrap(PANGO_WRAP_MODE['WRAP_CHAR']) - first_line, index = layout.get_first_line() - resume_at = index or first_line.length - if resume_at >= len(text.encode('utf-8')): - resume_at = None - - return first_line_metrics( - first_line, text, layout, resume_at, space_collapse, style, hyphenated, - style['hyphenate_character']) - - -def show_first_line(context, textbox, text_overflow, x, y): - """Draw the given ``textbox`` line to the document ``context``.""" - pango.pango_layout_set_single_paragraph_mode( - textbox.pango_layout.layout, True) - - if text_overflow == 'ellipsis': - assert textbox.pango_layout.max_width is not None - max_width = textbox.pango_layout.max_width - pango.pango_layout_set_width( - textbox.pango_layout.layout, units_from_double(max_width)) - pango.pango_layout_set_ellipsize( - textbox.pango_layout.layout, pango.PANGO_ELLIPSIZE_END) - - first_line, _ = textbox.pango_layout.get_first_line() - - font_size = textbox.style['font_size'] - utf8_text = textbox.text.encode('utf-8') - previous_utf8_position = 0 - - runs = [first_line.runs[0]] - while runs[-1].next != ffi.NULL: - runs.append(runs[-1].next) - - context.text_matrix(font_size, 0, 0, -font_size, x, y) - last_font = None - string = '' - for run in runs: - # Pango objects - glyph_item = ffi.cast('PangoGlyphItem *', run.data) - glyph_string = glyph_item.glyphs - glyphs = glyph_string.glyphs - num_glyphs = glyph_string.num_glyphs - offset = glyph_item.item.offset - clusters = glyph_string.log_clusters - - # Font content - pango_font = glyph_item.item.analysis.font - hb_font = pango.pango_font_get_hb_font(pango_font) - font_hash = hb_face = harfbuzz.hb_font_get_face(hb_font) - fonts = context.get_fonts() - if font_hash in fonts: - font = fonts[font_hash] - else: - hb_blob = harfbuzz.hb_face_reference_blob(hb_face) - hb_data = harfbuzz.hb_blob_get_data(hb_blob, context.length) - file_content = ffi.unpack(hb_data, int(context.length[0])) - font = context.add_font(font_hash, file_content, pango_font) - - # Positions of the glyphs in the UTF-8 string - utf8_positions = [offset + clusters[i] for i in range(1, num_glyphs)] - utf8_positions.append(offset + glyph_item.item.length) - - # Go through the run glyphs - if font != last_font: - if string: - context.show_text(string) - string = '' - last_font = font - context.set_font_size(font.hash, 1) - string += '<' - for i in range(num_glyphs): - glyph = glyphs[i].glyph - width = glyphs[i].geometry.width - utf8_position = utf8_positions[i] - string += f'{glyph:04x}' - - # Ink bounding box and logical widths in font - if glyph not in font.widths: - pango.pango_font_get_glyph_extents( - pango_font, glyph, context.ink_rect, context.logical_rect) - x1, y1, x2, y2 = ( - context.ink_rect.x, - -context.ink_rect.y - context.ink_rect.height, - context.ink_rect.x + context.ink_rect.width, - -context.ink_rect.y) - if x1 < font.bbox[0]: - font.bbox[0] = int(units_to_double(x1 * 1000) / font_size) - if y1 < font.bbox[1]: - font.bbox[1] = int(units_to_double(y1 * 1000) / font_size) - if x2 > font.bbox[2]: - font.bbox[2] = int(units_to_double(x2 * 1000) / font_size) - if y2 > font.bbox[3]: - font.bbox[3] = int(units_to_double(y2 * 1000) / font_size) - font.widths[glyph] = int( - units_to_double(context.logical_rect.width * 1000) / - font_size) - - # Kerning - kerning = int( - font.widths[glyph] - units_to_double(width * 1000) / font_size) - if kerning: - string += f'>{kerning}<' - - # Mapping between glyphs and characters - if glyph not in font.cmap and glyph != pango.PANGO_GLYPH_EMPTY: - utf8_slice = slice(previous_utf8_position, utf8_position) - font.cmap[glyph] = utf8_text[utf8_slice].decode('utf-8') - previous_utf8_position = utf8_position - - # Close the last glyphs list, remove if empty - if string[-1] == '<': - string = string.rsplit('>', 1)[0] - string += '>' - - # Draw text - context.show_text(string) - - -def get_log_attrs(text, lang): - if lang: - lang_p, lang = unicode_to_char_p(lang) - else: - lang = None - language = pango.pango_language_get_default() - if lang: - language = pango.pango_language_from_string(lang_p) - # TODO: this should be removed when bidi is supported - for char in ('\u202a', '\u202b', '\u202c', '\u202d', '\u202e'): - text = text.replace(char, '') - text_p, bytestring = unicode_to_char_p(text) - length = len(text) + 1 - log_attrs = ffi.new('PangoLogAttr[]', length) - pango.pango_get_log_attrs( - text_p, len(bytestring), -1, language, log_attrs, length) - return bytestring, log_attrs - - -def can_break_text(text, lang): - if not text or len(text) < 2: - return None - bytestring, log_attrs = get_log_attrs(text, lang) - length = len(text) + 1 - return any(attr.is_line_break for attr in log_attrs[1:length - 1]) - - -def get_next_word_boundaries(text, lang): - if not text or len(text) < 2: - return None - bytestring, log_attrs = get_log_attrs(text, lang) - for i, attr in enumerate(log_attrs): - if attr.is_word_end: - word_end = i - break - if attr.is_word_boundary: - word_start = i - else: - return None - return word_start, word_end diff --git a/weasyprint/text/constants.py b/weasyprint/text/constants.py new file mode 100644 index 00000000..7f5e1467 --- /dev/null +++ b/weasyprint/text/constants.py @@ -0,0 +1,381 @@ +""" + weasyprint.text.constants + ------------------------- + + Constants used for text layout. + +""" + +from .ffi import pango + +# Pango features +PANGO_STYLE = { + 'normal': pango.PANGO_STYLE_NORMAL, + 'oblique': pango.PANGO_STYLE_OBLIQUE, + 'italic': pango.PANGO_STYLE_ITALIC, +} +PANGO_STRETCH = { + 'ultra-condensed': pango.PANGO_STRETCH_ULTRA_CONDENSED, + 'extra-condensed': pango.PANGO_STRETCH_EXTRA_CONDENSED, + 'condensed': pango.PANGO_STRETCH_CONDENSED, + 'semi-condensed': pango.PANGO_STRETCH_SEMI_CONDENSED, + 'normal': pango.PANGO_STRETCH_NORMAL, + 'semi-expanded': pango.PANGO_STRETCH_SEMI_EXPANDED, + 'expanded': pango.PANGO_STRETCH_EXPANDED, + 'extra-expanded': pango.PANGO_STRETCH_EXTRA_EXPANDED, + 'ultra-expanded': pango.PANGO_STRETCH_ULTRA_EXPANDED, +} +PANGO_WRAP_MODE = { + 'WRAP_WORD': pango.PANGO_WRAP_WORD, + 'WRAP_CHAR': pango.PANGO_WRAP_CHAR, + 'WRAP_WORD_CHAR': pango.PANGO_WRAP_WORD_CHAR +} + +# Language system tags +# From https://docs.microsoft.com/typography/opentype/spec/languagetags +LST_TO_ISO = { + 'aba': 'abq', + 'afk': 'afr', + 'afr': 'aar', + 'agw': 'ahg', + 'als': 'gsw', + 'alt': 'atv', + 'ari': 'aiw', + 'ark': 'mhv', + 'ath': 'apk', + 'avr': 'ava', + 'bad': 'bfq', + 'bad0': 'bad', + 'bag': 'bfy', + 'bal': 'krc', + 'bau': 'bci', + 'bch': 'bcq', + 'bgr': 'bul', + 'bil': 'byn', + 'bkf': 'bla', + 'bli': 'bal', + 'bln': 'bjt', + 'blt': 'bft', + 'bmb': 'bam', + 'bri': 'bra', + 'brm': 'mya', + 'bsh': 'bak', + 'bti': 'btb', + 'chg': 'sgw', + 'chh': 'hne', + 'chi': 'nya', + 'chk': 'ckt', + 'chk0': 'chk', + 'chu': 'chv', + 'chy': 'chy', + 'cmr': 'swb', + 'crr': 'crx', + 'crt': 'crh', + 'csl': 'chu', + 'csy': 'ces', + 'dcr': 'cwd', + 'dgr': 'doi', + 'djr': 'dje', + 'djr0': 'djr', + 'dng': 'ada', + 'dnk': 'din', + 'dri': 'prs', + 'dun': 'dng', + 'dzn': 'dzo', + 'ebi': 'igb', + 'ecr': 'crj', + 'edo': 'bin', + 'erz': 'myv', + 'esp': 'spa', + 'eti': 'est', + 'euq': 'eus', + 'evk': 'evn', + 'evn': 'eve', + 'fan': 'acf', + 'fan0': 'fan', + 'far': 'fas', + 'fji': 'fij', + 'fle': 'vls', + 'fne': 'enf', + 'fos': 'fao', + 'fri': 'fry', + 'frl': 'fur', + 'frp': 'frp', + 'fta': 'fuf', + 'gad': 'gaa', + 'gae': 'gla', + 'gal': 'glg', + 'gaw': 'gbm', + 'gil': 'niv', + 'gil0': 'gil', + 'gmz': 'guk', + 'grn': 'kal', + 'gro': 'grt', + 'gua': 'grn', + 'hai': 'hat', + 'hal': 'flm', + 'har': 'hoj', + 'hbn': 'amf', + 'hma': 'mrj', + 'hnd': 'hno', + 'ho': 'hoc', + 'hri': 'har', + 'hye0': 'hye', + 'ijo': 'ijc', + 'ing': 'inh', + 'inu': 'iku', + 'iri': 'gle', + 'irt': 'gle', + 'ism': 'smn', + 'iwr': 'heb', + 'jan': 'jpn', + 'jii': 'yid', + 'jud': 'lad', + 'jul': 'dyu', + 'kab': 'kbd', + 'kab0': 'kab', + 'kac': 'kfr', + 'kal': 'kln', + 'kar': 'krc', + 'keb': 'ktb', + 'kge': 'kat', + 'kha': 'kjh', + 'khk': 'kca', + 'khs': 'kca', + 'khv': 'kca', + 'kis': 'kqs', + 'kkn': 'kex', + 'klm': 'xal', + 'kmb': 'kam', + 'kmn': 'kfy', + 'kmo': 'kmw', + 'kms': 'kxc', + 'knr': 'kau', + 'kod': 'kfa', + 'koh': 'okm', + 'kon': 'ktu', + 'kon0': 'kon', + 'kop': 'koi', + 'koz': 'kpv', + 'kpl': 'kpe', + 'krk': 'kaa', + 'krm': 'kdr', + 'krn': 'kar', + 'krt': 'kqy', + 'ksh': 'kas', + 'ksh0': 'ksh', + 'ksi': 'kha', + 'ksm': 'sjd', + 'kui': 'kxu', + 'kul': 'kfx', + 'kuu': 'kru', + 'kuy': 'kdt', + 'kyk': 'kpy', + 'lad': 'lld', + 'lah': 'bfu', + 'lak': 'lbe', + 'lam': 'lmn', + 'laz': 'lzz', + 'lcr': 'crm', + 'ldk': 'lbj', + 'lma': 'mhr', + 'lmb': 'lif', + 'lmw': 'ngl', + 'lsb': 'dsb', + 'lsm': 'smj', + 'lth': 'lit', + 'luh': 'luy', + 'lvi': 'lav', + 'maj': 'mpe', + 'mak': 'vmw', + 'man': 'mns', + 'map': 'arn', + 'maw': 'mwr', + 'mbn': 'kmb', + 'mch': 'mnc', + 'mcr': 'crm', + 'mde': 'men', + 'men': 'mym', + 'miz': 'lus', + 'mkr': 'mak', + 'mle': 'mdy', + 'mln': 'mlq', + 'mlr': 'mal', + 'mly': 'msa', + 'mnd': 'mnk', + 'mng': 'mon', + 'mnk': 'man', + 'mnx': 'glv', + 'mok': 'mdf', + 'mon': 'mnw', + 'mth': 'mai', + 'mts': 'mlt', + 'mun': 'unr', + 'nan': 'gld', + 'nas': 'nsk', + 'ncr': 'csw', + 'ndg': 'ndo', + 'nhc': 'csw', + 'nis': 'dap', + 'nkl': 'nyn', + 'nko': 'nqo', + 'nor': 'nob', + 'nsm': 'sme', + 'nta': 'nod', + 'nto': 'epo', + 'nyn': 'nno', + 'ocr': 'ojs', + 'ojb': 'oji', + 'oro': 'orm', + 'paa': 'sam', + 'pal': 'pli', + 'pap': 'plp', + 'pap0': 'pap', + 'pas': 'pus', + 'pgr': 'ell', + 'pil': 'fil', + 'plg': 'pce', + 'plk': 'pol', + 'ptg': 'por', + 'qin': 'bgr', + 'rbu': 'bxr', + 'rcr': 'atj', + 'rms': 'roh', + 'rom': 'ron', + 'roy': 'rom', + 'rsy': 'rue', + 'rua': 'kin', + 'sad': 'sck', + 'say': 'chp', + 'sek': 'xan', + 'sel': 'sel', + 'sgo': 'sag', + 'sgs': 'sgs', + 'sib': 'sjo', + 'sig': 'xst', + 'sks': 'sms', + 'sky': 'slk', + 'sla': 'scs', + 'sml': 'som', + 'sna': 'seh', + 'sna0': 'sna', + 'snh': 'sin', + 'sog': 'gru', + 'srb': 'srp', + 'ssl': 'xsl', + 'ssm': 'sma', + 'sur': 'suq', + 'sve': 'swe', + 'swa': 'aii', + 'swk': 'swa', + 'swz': 'ssw', + 'sxt': 'ngo', + 'taj': 'tgk', + 'tcr': 'cwd', + 'tgn': 'ton', + 'tgr': 'tig', + 'tgy': 'tir', + 'tht': 'tah', + 'tib': 'bod', + 'tkm': 'tuk', + 'tmn': 'tem', + 'tna': 'tsn', + 'tne': 'enh', + 'tng': 'toi', + 'tod': 'xal', + 'tod0': 'tod', + 'trk': 'tur', + 'tsg': 'tso', + 'tua': 'tru', + 'tul': 'tcy', + 'tuv': 'tyv', + 'twi': 'aka', + 'usb': 'hsb', + 'uyg': 'uig', + 'vit': 'vie', + 'vro': 'vro', + 'wa': 'wbm', + 'wag': 'wbr', + 'wcr': 'crk', + 'wel': 'cym', + 'wlf': 'wol', + 'xbd': 'khb', + 'xhs': 'xho', + 'yak': 'sah', + 'yba': 'yor', + 'ycr': 'cre', + 'yim': 'iii', + 'zhh': 'zho', + 'zhp': 'zho', + 'zhs': 'zho', + 'zht': 'zho', + 'znd': 'zne', +} + +# Font features +LIGATURE_KEYS = { + 'common-ligatures': ['liga', 'clig'], + 'historical-ligatures': ['hlig'], + 'discretionary-ligatures': ['dlig'], + 'contextual': ['calt'], +} +CAPS_KEYS = { + 'small-caps': ['smcp'], + 'all-small-caps': ['c2sc', 'smcp'], + 'petite-caps': ['pcap'], + 'all-petite-caps': ['c2pc', 'pcap'], + 'unicase': ['unic'], + 'titling-caps': ['titl'], +} +NUMERIC_KEYS = { + 'lining-nums': 'lnum', + 'oldstyle-nums': 'onum', + 'proportional-nums': 'pnum', + 'tabular-nums': 'tnum', + 'diagonal-fractions': 'frac', + 'stacked-fractions': 'afrc', + 'ordinal': 'ordn', + 'slashed-zero': 'zero', +} +EAST_ASIAN_KEYS = { + 'jis78': 'jp78', + 'jis83': 'jp83', + 'jis90': 'jp90', + 'jis04': 'jp04', + 'simplified': 'smpl', + 'traditional': 'trad', + 'full-width': 'fwid', + 'proportional-width': 'pwid', + 'ruby': 'ruby', +} + +# Fontconfig features +FONTCONFIG_WEIGHT = { + 'normal': 'normal', + 'bold': 'bold', + 100: 'thin', + 200: 'extralight', + 300: 'light', + 400: 'normal', + 500: 'medium', + 600: 'demibold', + 700: 'bold', + 800: 'extrabold', + 900: 'black', +} +FONTCONFIG_STYLE = { + 'normal': 'roman', + 'italic': 'italic', + 'oblique': 'oblique', +} +FONTCONFIG_STRETCH = { + 'normal': 'normal', + 'ultra-condensed': 'ultracondensed', + 'extra-condensed': 'extracondensed', + 'condensed': 'condensed', + 'semi-condensed': 'semicondensed', + 'semi-expanded': 'semiexpanded', + 'expanded': 'expanded', + 'extra-expanded': 'extraexpanded', + 'ultra-expanded': 'ultraexpanded', +} diff --git a/weasyprint/text/ffi.py b/weasyprint/text/ffi.py new file mode 100644 index 00000000..32231f20 --- /dev/null +++ b/weasyprint/text/ffi.py @@ -0,0 +1,407 @@ +""" + weasyprint.text.ffi + ------------------- + + Imports of dynamic libraries used for text layout. + +""" + +import cffi + +ffi = cffi.FFI() +ffi.cdef(''' + // HarfBuzz + + typedef ... hb_font_t; + typedef ... hb_face_t; + typedef ... hb_blob_t; + hb_face_t * hb_font_get_face (hb_font_t *font); + hb_blob_t * hb_face_reference_blob (hb_face_t *face); + const char * hb_blob_get_data (hb_blob_t *blob, unsigned int *length); + + // Pango + + typedef unsigned int guint; + typedef int gint; + typedef char gchar; + typedef gint gboolean; + typedef void* gpointer; + typedef ... PangoLayout; + typedef ... PangoContext; + typedef ... PangoFontMap; + typedef ... PangoFontMetrics; + typedef ... PangoLanguage; + typedef ... PangoTabArray; + typedef ... PangoFontDescription; + typedef ... PangoLayoutIter; + typedef ... PangoAttrList; + typedef ... PangoAttrClass; + typedef ... PangoFont; + typedef guint PangoGlyph; + typedef gint PangoGlyphUnit; + + const guint PANGO_GLYPH_EMPTY = 0x0FFFFFFF; + + typedef enum { + PANGO_STYLE_NORMAL, + PANGO_STYLE_OBLIQUE, + PANGO_STYLE_ITALIC + } PangoStyle; + + typedef enum { + PANGO_WEIGHT_THIN = 100, + PANGO_WEIGHT_ULTRALIGHT = 200, + PANGO_WEIGHT_LIGHT = 300, + PANGO_WEIGHT_BOOK = 380, + PANGO_WEIGHT_NORMAL = 400, + PANGO_WEIGHT_MEDIUM = 500, + PANGO_WEIGHT_SEMIBOLD = 600, + PANGO_WEIGHT_BOLD = 700, + PANGO_WEIGHT_ULTRABOLD = 800, + PANGO_WEIGHT_HEAVY = 900, + PANGO_WEIGHT_ULTRAHEAVY = 1000 + } PangoWeight; + + typedef enum { + PANGO_STRETCH_ULTRA_CONDENSED, + PANGO_STRETCH_EXTRA_CONDENSED, + PANGO_STRETCH_CONDENSED, + PANGO_STRETCH_SEMI_CONDENSED, + PANGO_STRETCH_NORMAL, + PANGO_STRETCH_SEMI_EXPANDED, + PANGO_STRETCH_EXPANDED, + PANGO_STRETCH_EXTRA_EXPANDED, + PANGO_STRETCH_ULTRA_EXPANDED + } PangoStretch; + + typedef enum { + PANGO_WRAP_WORD, + PANGO_WRAP_CHAR, + PANGO_WRAP_WORD_CHAR + } PangoWrapMode; + + typedef enum { + PANGO_TAB_LEFT + } PangoTabAlign; + + typedef enum { + PANGO_ELLIPSIZE_NONE, + PANGO_ELLIPSIZE_START, + PANGO_ELLIPSIZE_MIDDLE, + PANGO_ELLIPSIZE_END + } PangoEllipsizeMode; + + typedef struct GSList { + gpointer data; + struct GSList *next; + } GSList; + + typedef struct { + const PangoAttrClass *klass; + guint start_index; + guint end_index; + } PangoAttribute; + + typedef struct { + PangoLayout *layout; + gint start_index; + gint length; + GSList *runs; + guint is_paragraph_start : 1; + guint resolved_dir : 3; + } PangoLayoutLine; + + typedef struct { + int x; + int y; + int width; + int height; + } PangoRectangle; + + typedef struct { + guint is_line_break: 1; + guint is_mandatory_break : 1; + guint is_char_break : 1; + guint is_white : 1; + guint is_cursor_position : 1; + guint is_word_start : 1; + guint is_word_end : 1; + guint is_sentence_boundary : 1; + guint is_sentence_start : 1; + guint is_sentence_end : 1; + guint backspace_deletes_character : 1; + guint is_expandable_space : 1; + guint is_word_boundary : 1; + } PangoLogAttr; + + typedef struct { + void *shape_engine; + void *lang_engine; + PangoFont *font; + guint level; + guint gravity; + guint flags; + guint script; + PangoLanguage *language; + GSList *extra_attrs; + } PangoAnalysis; + + typedef struct { + gint offset; + gint length; + gint num_chars; + PangoAnalysis analysis; + } PangoItem; + + typedef struct { + PangoGlyphUnit width; + PangoGlyphUnit x_offset; + PangoGlyphUnit y_offset; + } PangoGlyphGeometry; + + typedef struct { + guint is_cluster_start : 1; + } PangoGlyphVisAttr; + + typedef struct { + PangoGlyph glyph; + PangoGlyphGeometry geometry; + PangoGlyphVisAttr attr; + } PangoGlyphInfo; + + typedef struct { + gint num_glyphs; + PangoGlyphInfo *glyphs; + gint *log_clusters; + } PangoGlyphString; + + typedef struct { + PangoItem *item; + PangoGlyphString *glyphs; + } PangoGlyphItem; + + int pango_version (void); + + double pango_units_to_double (int i); + int pango_units_from_double (double d); + void g_object_unref (gpointer object); + void g_type_init (void); + + PangoLayout * pango_layout_new (PangoContext *context); + void pango_layout_set_width (PangoLayout *layout, int width); + PangoAttrList * pango_layout_get_attributes(PangoLayout *layout); + void pango_layout_set_attributes ( + PangoLayout *layout, PangoAttrList *attrs); + void pango_layout_set_text ( + PangoLayout *layout, const char *text, int length); + void pango_layout_set_tabs ( + PangoLayout *layout, PangoTabArray *tabs); + void pango_layout_set_font_description ( + PangoLayout *layout, const PangoFontDescription *desc); + void pango_layout_set_wrap ( + PangoLayout *layout, PangoWrapMode wrap); + void pango_layout_set_single_paragraph_mode ( + PangoLayout *layout, gboolean setting); + int pango_layout_get_baseline (PangoLayout *layout); + PangoLayoutLine * pango_layout_get_line_readonly ( + PangoLayout *layout, int line); + + hb_font_t * pango_font_get_hb_font (PangoFont *font); + + PangoFontDescription * pango_font_description_new (void); + void pango_font_description_free (PangoFontDescription *desc); + PangoFontDescription * pango_font_description_copy ( + const PangoFontDescription *desc); + void pango_font_description_set_family ( + PangoFontDescription *desc, const char *family); + void pango_font_description_set_style ( + PangoFontDescription *desc, PangoStyle style); + PangoStyle pango_font_description_get_style ( + const PangoFontDescription *desc); + void pango_font_description_set_stretch ( + PangoFontDescription *desc, PangoStretch stretch); + void pango_font_description_set_weight ( + PangoFontDescription *desc, PangoWeight weight); + void pango_font_description_set_absolute_size ( + PangoFontDescription *desc, double size); + int pango_font_description_get_size (PangoFontDescription *desc); + + int pango_glyph_string_get_width (PangoGlyphString *glyphs); + char * pango_font_description_to_string ( + const PangoFontDescription *desc); + + PangoFontDescription * pango_font_describe (PangoFont *font); + const char * pango_font_description_get_family ( + const PangoFontDescription *desc); + int pango_font_description_hash (const PangoFontDescription *desc); + + PangoContext * pango_context_new (); + PangoContext * pango_font_map_create_context (PangoFontMap *fontmap); + + PangoFontMetrics * pango_context_get_metrics ( + PangoContext *context, const PangoFontDescription *desc, + PangoLanguage *language); + void pango_font_metrics_unref (PangoFontMetrics *metrics); + int pango_font_metrics_get_ascent (PangoFontMetrics *metrics); + int pango_font_metrics_get_descent (PangoFontMetrics *metrics); + int pango_font_metrics_get_underline_thickness ( + PangoFontMetrics *metrics); + int pango_font_metrics_get_underline_position ( + PangoFontMetrics *metrics); + int pango_font_metrics_get_strikethrough_thickness ( + PangoFontMetrics *metrics); + int pango_font_metrics_get_strikethrough_position ( + PangoFontMetrics *metrics); + + void pango_context_set_round_glyph_positions ( + PangoContext *context, gboolean round_positions); + + PangoFontMetrics * pango_font_get_metrics ( + PangoFont *font, PangoLanguage *language); + + void pango_font_get_glyph_extents ( + PangoFont *font, PangoGlyph glyph, PangoRectangle *ink_rect, + PangoRectangle *logical_rect); + + PangoAttrList * pango_attr_list_new (void); + void pango_attr_list_unref (PangoAttrList *list); + void pango_attr_list_insert ( + PangoAttrList *list, PangoAttribute *attr); + void pango_attr_list_change ( + PangoAttrList *list, PangoAttribute *attr); + PangoAttribute * pango_attr_font_features_new (const gchar *features); + PangoAttribute * pango_attr_letter_spacing_new (int letter_spacing); + void pango_attribute_destroy (PangoAttribute *attr); + + PangoTabArray * pango_tab_array_new_with_positions ( + gint size, gboolean positions_in_pixels, PangoTabAlign first_alignment, + gint first_position, ...); + void pango_tab_array_free (PangoTabArray *tab_array); + + PangoLanguage * pango_language_from_string (const char *language); + PangoLanguage * pango_language_get_default (void); + void pango_context_set_language ( + PangoContext *context, PangoLanguage *language); + void pango_context_set_font_map ( + PangoContext *context, PangoFontMap *font_map); + + void pango_layout_line_get_extents ( + PangoLayoutLine *line, + PangoRectangle *ink_rect, PangoRectangle *logical_rect); + + PangoContext * pango_layout_get_context (PangoLayout *layout); + void pango_layout_set_ellipsize ( + PangoLayout *layout, + PangoEllipsizeMode ellipsize); + + void pango_get_log_attrs ( + const char *text, int length, int level, PangoLanguage *language, + PangoLogAttr *log_attrs, int attrs_len); + + + // FontConfig + + typedef int FcBool; + typedef struct _FcConfig FcConfig; + typedef struct _FcPattern FcPattern; + typedef struct _FcStrList FcStrList; + typedef unsigned char FcChar8; + + typedef enum { + FcResultMatch, FcResultNoMatch, FcResultTypeMismatch, FcResultNoId, + FcResultOutOfMemory + } FcResult; + + typedef enum { + FcMatchPattern, FcMatchFont, FcMatchScan + } FcMatchKind; + + typedef struct _FcFontSet { + int nfont; + int sfont; + FcPattern **fonts; + } FcFontSet; + + typedef enum _FcSetName { + FcSetSystem = 0, + FcSetApplication = 1 + } FcSetName; + + FcConfig * FcInitLoadConfigAndFonts (void); + void FcConfigDestroy (FcConfig *config); + FcBool FcConfigAppFontAddFile ( + FcConfig *config, const FcChar8 *file); + FcConfig * FcConfigGetCurrent (void); + FcBool FcConfigSetCurrent (FcConfig *config); + FcBool FcConfigParseAndLoad ( + FcConfig *config, const FcChar8 *file, FcBool complain); + + FcFontSet * FcConfigGetFonts(FcConfig *config, FcSetName set); + FcStrList * FcConfigGetConfigFiles(FcConfig *config); + FcChar8 * FcStrListNext(FcStrList *list); + + void FcDefaultSubstitute (FcPattern *pattern); + FcBool FcConfigSubstitute ( + FcConfig *config, FcPattern *p, FcMatchKind kind); + + FcPattern * FcPatternCreate (void); + FcPattern * FcPatternDestroy (FcPattern *p); + FcBool FcPatternAddString ( + FcPattern *p, const char *object, const FcChar8 *s); + FcResult FcPatternGetString ( + FcPattern *p, const char *object, int n, FcChar8 **s); + FcPattern * FcFontMatch ( + FcConfig *config, FcPattern *p, FcResult *result); + + + // PangoFT2 + + typedef ... PangoFcFontMap; + + PangoFontMap * pango_ft2_font_map_new (void); + void pango_fc_font_map_set_config ( + PangoFcFontMap *fcfontmap, FcConfig *fcconfig); +''') + + +def _dlopen(ffi, *names): + """Try various names for the same library, for different platforms.""" + for name in names: + try: + return ffi.dlopen(name) + except OSError: + pass + # Re-raise the exception. + return ffi.dlopen(names[0]) # pragma: no cover + + +gobject = _dlopen( + ffi, 'gobject-2.0-0', 'gobject-2.0', 'libgobject-2.0-0', + 'libgobject-2.0.so.0', 'libgobject-2.0.dylib') +pango = _dlopen( + ffi, 'pango-1.0-0', 'pango-1.0', 'libpango-1.0-0', 'libpango-1.0.so.0', + 'libpango-1.0.dylib') +harfbuzz = _dlopen( + ffi, 'harfbuzz', 'harfbuzz-0.0', 'libharfbuzz-0', + 'libharfbuzz.so.0', 'libharfbuzz.so.0', 'libharfbuzz.0.dylib') +fontconfig = _dlopen( + ffi, 'fontconfig-1', 'fontconfig', 'libfontconfig', 'libfontconfig-1.dll', + 'libfontconfig.so.1', 'libfontconfig-1.dylib') +pangoft2 = _dlopen( + ffi, 'pangoft2-1.0-0', 'pangoft2-1.0', 'libpangoft2-1.0-0', + 'libpangoft2-1.0.so.0', 'libpangoft2-1.0.dylib') + +gobject.g_type_init() + +units_to_double = pango.pango_units_to_double +units_from_double = pango.pango_units_from_double + + +def unicode_to_char_p(string): + """Return ``(pointer, bytestring)``. + + The byte string must live at least as long as the pointer is used. + + """ + bytestring = string.encode('utf-8').replace(b'\x00', b'') + return ffi.new('char[]', bytestring), bytestring diff --git a/weasyprint/fonts.py b/weasyprint/text/fonts.py similarity index 77% rename from weasyprint/fonts.py rename to weasyprint/text/fonts.py index 1febca71..e566b6a1 100644 --- a/weasyprint/fonts.py +++ b/weasyprint/text/fonts.py @@ -1,6 +1,6 @@ """ - weasyprint.fonts - ---------------- + weasyprint.text.fonts + --------------------- Interface with external libraries managing fonts installed on the system. @@ -15,115 +15,12 @@ import warnings from fontTools.ttLib import TTFont, woff2 -from .logger import LOGGER -from .text import dlopen, ffi, get_font_features, gobject -from .urls import FILESYSTEM_ENCODING, fetch - -fontconfig = dlopen( - ffi, 'fontconfig-1', 'fontconfig', 'libfontconfig', 'libfontconfig-1.dll', - 'libfontconfig.so.1', 'libfontconfig-1.dylib') -pangoft2 = dlopen( - ffi, 'pangoft2-1.0-0', 'pangoft2-1.0', 'libpangoft2-1.0-0', - 'libpangoft2-1.0.so', 'libpangoft2-1.0.dylib') - -ffi.cdef(''' - // FontConfig - - typedef int FcBool; - typedef struct _FcConfig FcConfig; - typedef struct _FcPattern FcPattern; - typedef struct _FcStrList FcStrList; - typedef unsigned char FcChar8; - - typedef enum { - FcResultMatch, FcResultNoMatch, FcResultTypeMismatch, FcResultNoId, - FcResultOutOfMemory - } FcResult; - - typedef enum { - FcMatchPattern, FcMatchFont, FcMatchScan - } FcMatchKind; - - - typedef struct _FcFontSet { - int nfont; - int sfont; - FcPattern **fonts; - } FcFontSet; - - typedef enum _FcSetName { - FcSetSystem = 0, - FcSetApplication = 1 - } FcSetName; - - FcConfig * FcInitLoadConfigAndFonts (void); - void FcConfigDestroy (FcConfig *config); - FcBool FcConfigAppFontAddFile ( - FcConfig *config, const FcChar8 *file); - FcConfig * FcConfigGetCurrent (void); - FcBool FcConfigSetCurrent (FcConfig *config); - FcBool FcConfigParseAndLoad ( - FcConfig *config, const FcChar8 *file, FcBool complain); - - FcFontSet * FcConfigGetFonts(FcConfig *config, FcSetName set); - FcStrList * FcConfigGetConfigFiles(FcConfig *config); - FcChar8 * FcStrListNext(FcStrList *list); - - void FcDefaultSubstitute (FcPattern *pattern); - FcBool FcConfigSubstitute ( - FcConfig *config, FcPattern *p, FcMatchKind kind); - - FcPattern * FcPatternCreate (void); - FcPattern * FcPatternDestroy (FcPattern *p); - FcBool FcPatternAddString ( - FcPattern *p, const char *object, const FcChar8 *s); - FcResult FcPatternGetString ( - FcPattern *p, const char *object, int n, FcChar8 **s); - FcPattern * FcFontMatch ( - FcConfig *config, FcPattern *p, FcResult *result); - - - // PangoFT2 - - typedef ... PangoFcFontMap; - - PangoFontMap * pango_ft2_font_map_new (void); - void pango_fc_font_map_set_config ( - PangoFcFontMap *fcfontmap, FcConfig *fcconfig); - -''') - -FONTCONFIG_WEIGHT_CONSTANTS = { - 'normal': 'normal', - 'bold': 'bold', - 100: 'thin', - 200: 'extralight', - 300: 'light', - 400: 'normal', - 500: 'medium', - 600: 'demibold', - 700: 'bold', - 800: 'extrabold', - 900: 'black', -} - -FONTCONFIG_STYLE_CONSTANTS = { - 'normal': 'roman', - 'italic': 'italic', - 'oblique': 'oblique', -} - -FONTCONFIG_STRETCH_CONSTANTS = { - 'normal': 'normal', - 'ultra-condensed': 'ultracondensed', - 'extra-condensed': 'extracondensed', - 'condensed': 'condensed', - 'semi-condensed': 'semicondensed', - 'semi-expanded': 'semiexpanded', - 'expanded': 'expanded', - 'extra-expanded': 'extraexpanded', - 'ultra-expanded': 'ultraexpanded', -} +from ..logger import LOGGER +from ..urls import FILESYSTEM_ENCODING, fetch +from .constants import ( + CAPS_KEYS, EAST_ASIAN_KEYS, FONTCONFIG_STRETCH, FONTCONFIG_STYLE, + FONTCONFIG_WEIGHT, LIGATURE_KEYS, NUMERIC_KEYS) +from .ffi import ffi, fontconfig, gobject, pangoft2 def _check_font_configuration(font_config): @@ -317,15 +214,14 @@ class FontConfiguration: LOGGER.debug( 'Failed to load font at %r (%s)', url, exc) continue - font_features = { + features = { rules[0][0].replace('-', '_'): rules[0][1] for rules in rule_descriptors.get('font_variant', [])} if 'font_feature_settings' in rule_descriptors: - font_features['font_feature_settings'] = ( + features['font_feature_settings'] = ( rule_descriptors['font_feature_settings']) features_string = '' - for key, value in get_font_features( - **font_features).items(): + for key, value in font_features(**features).items(): features_string += f'{key} {value}' fd = tempfile.NamedTemporaryFile( 'wb', dir=self._tempdir, delete=False) @@ -333,11 +229,11 @@ class FontConfiguration: fd.write(font) fd.close() self._filenames.append(font_filename) - fontconfig_style = FONTCONFIG_STYLE_CONSTANTS[ + fontconfig_style = FONTCONFIG_STYLE[ rule_descriptors.get('font_style', 'normal')] - fontconfig_weight = FONTCONFIG_WEIGHT_CONSTANTS[ + fontconfig_weight = FONTCONFIG_WEIGHT[ rule_descriptors.get('font_weight', 'normal')] - fontconfig_stretch = FONTCONFIG_STRETCH_CONSTANTS[ + fontconfig_stretch = FONTCONFIG_STRETCH[ rule_descriptors.get('font_stretch', 'normal')] xml = f''' @@ -400,3 +296,75 @@ class FontConfiguration: os.remove(filename) except OSError: continue + + +def font_features(font_kerning='normal', font_variant_ligatures='normal', + font_variant_position='normal', font_variant_caps='normal', + font_variant_numeric='normal', + font_variant_alternates='normal', + font_variant_east_asian='normal', + font_feature_settings='normal'): + """Get the font features from the different properties in style. + + See https://www.w3.org/TR/css-fonts-3/#feature-precedence + + """ + features = {} + + # Step 1: getting the default, we rely on Pango for this + # Step 2: @font-face font-variant, done in fonts.add_font_face + # Step 3: @font-face font-feature-settings, done in fonts.add_font_face + + # Step 4: font-variant and OpenType features + + if font_kerning != 'auto': + features['kern'] = int(font_kerning == 'normal') + + if font_variant_ligatures == 'none': + for keys in LIGATURE_KEYS.values(): + for key in keys: + features[key] = 0 + elif font_variant_ligatures != 'normal': + for ligature_type in font_variant_ligatures: + value = 1 + if ligature_type.startswith('no-'): + value = 0 + ligature_type = ligature_type[3:] + for key in LIGATURE_KEYS[ligature_type]: + features[key] = value + + if font_variant_position == 'sub': + # TODO: the specification asks for additional checks + # https://www.w3.org/TR/css-fonts-3/#font-variant-position-prop + features['subs'] = 1 + elif font_variant_position == 'super': + features['sups'] = 1 + + if font_variant_caps != 'normal': + # TODO: the specification asks for additional checks + # https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop + for key in CAPS_KEYS[font_variant_caps]: + features[key] = 1 + + if font_variant_numeric != 'normal': + for key in font_variant_numeric: + features[NUMERIC_KEYS[key]] = 1 + + if font_variant_alternates != 'normal': + # TODO: support other values + # See https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop + if font_variant_alternates == 'historical-forms': + features['hist'] = 1 + + if font_variant_east_asian != 'normal': + for key in font_variant_east_asian: + features[EAST_ASIAN_KEYS[key]] = 1 + + # Step 5: incompatible non-OpenType features, already handled by Pango + + # Step 6: font-feature-settings + + if font_feature_settings != 'normal': + features.update(dict(font_feature_settings)) + + return features diff --git a/weasyprint/text/line_break.py b/weasyprint/text/line_break.py new file mode 100644 index 00000000..245ade38 --- /dev/null +++ b/weasyprint/text/line_break.py @@ -0,0 +1,610 @@ +""" + weasyprint.text.line_break + -------------------------- + + Decide where to break text lines. + +""" + +import re + +import pyphen + +from ..logger import LOGGER +from .constants import LST_TO_ISO, PANGO_STRETCH, PANGO_STYLE, PANGO_WRAP_MODE +from .ffi import ( + ffi, gobject, pango, pangoft2, unicode_to_char_p, units_from_double, + units_to_double) +from .fonts import font_features + + +def line_size(line, style): + """Get logical width and height of the given ``line``. + + ``style`` is used to add letter spacing (if needed). + + """ + logical_extents = ffi.new('PangoRectangle *') + pango.pango_layout_line_get_extents(line, ffi.NULL, logical_extents) + width = units_to_double(logical_extents.width) + height = units_to_double(logical_extents.height) + ffi.release(logical_extents) + if style['letter_spacing'] != 'normal': + width += style['letter_spacing'] + return width, height + + +def first_line_metrics(first_line, text, layout, resume_at, space_collapse, + style, hyphenated=False, hyphenation_character=None): + length = first_line.length + if hyphenated: + length -= len(hyphenation_character.encode('utf8')) + elif resume_at: + # Set an infinite width as we don't want to break lines when drawing, + # the lines have already been split and the size may differ. Rendering + # is also much faster when no width is set. + pango.pango_layout_set_width(layout.layout, -1) + + # Create layout with final text + first_line_text = text.encode('utf-8')[:length].decode('utf-8') + + # Remove trailing spaces if spaces collapse + if space_collapse: + first_line_text = first_line_text.rstrip(' ') + + # Remove soft hyphens + layout.set_text(first_line_text.replace('\u00ad', '')) + + first_line, _ = layout.get_first_line() + length = first_line.length if first_line is not None else 0 + + if '\u00ad' in first_line_text: + soft_hyphens = 0 + if first_line_text[0] == '\u00ad': + length += 2 # len('\u00ad'.encode('utf8')) + for i in range(len(layout.text)): + while i + soft_hyphens + 1 < len(first_line_text): + if first_line_text[i + soft_hyphens + 1] == '\u00ad': + soft_hyphens += 1 + else: + break + length += soft_hyphens * 2 # len('\u00ad'.encode('utf8')) + + width, height = line_size(first_line, style) + baseline = units_to_double(pango.pango_layout_get_baseline(layout.layout)) + layout.deactivate() + return layout, length, resume_at, width, height, baseline + + +class Layout: + """Object holding PangoLayout-related cdata pointers.""" + def __init__(self, context, font_size, style, justification_spacing=0, + max_width=None): + self.justification_spacing = justification_spacing + self.setup(context, font_size, style) + self.max_width = max_width + + def setup(self, context, font_size, style): + self.context = context + self.style = style + self.first_line_direction = 0 + + if context is None: + font_map = ffi.gc( + pangoft2.pango_ft2_font_map_new(), gobject.g_object_unref) + else: + font_map = context.font_config.font_map + pango_context = ffi.gc( + pango.pango_font_map_create_context(font_map), + gobject.g_object_unref) + pango.pango_context_set_round_glyph_positions(pango_context, False) + self.layout = ffi.gc( + pango.pango_layout_new(pango_context), + gobject.g_object_unref) + + if style['font_language_override'] != 'normal': + lang_p, lang = unicode_to_char_p(LST_TO_ISO.get( + style['font_language_override'].lower(), + style['font_language_override'])) + elif style['lang']: + lang_p, lang = unicode_to_char_p(style['lang']) + else: + lang = None + self.language = pango.pango_language_get_default() + if lang: + self.language = pango.pango_language_from_string(lang_p) + pango.pango_context_set_language(pango_context, self.language) + + assert not isinstance(style['font_family'], str), ( + 'font_family should be a list') + self.font = ffi.gc( + pango.pango_font_description_new(), + pango.pango_font_description_free) + family_p, family = unicode_to_char_p(','.join(style['font_family'])) + pango.pango_font_description_set_family(self.font, family_p) + pango.pango_font_description_set_style( + self.font, PANGO_STYLE[style['font_style']]) + pango.pango_font_description_set_stretch( + self.font, PANGO_STRETCH[style['font_stretch']]) + pango.pango_font_description_set_weight( + self.font, style['font_weight']) + pango.pango_font_description_set_absolute_size( + self.font, units_from_double(font_size)) + pango.pango_layout_set_font_description(self.layout, self.font) + + text_decoration = style['text_decoration_line'] + if text_decoration != 'none': + metrics = ffi.gc( + pango.pango_context_get_metrics( + pango_context, self.font, self.language), + pango.pango_font_metrics_unref) + self.ascent = units_to_double( + pango.pango_font_metrics_get_ascent(metrics)) + self.underline_position = units_to_double( + pango.pango_font_metrics_get_underline_position(metrics)) + self.strikethrough_position = units_to_double( + pango.pango_font_metrics_get_strikethrough_position(metrics)) + self.underline_thickness = units_to_double( + pango.pango_font_metrics_get_underline_thickness(metrics)) + self.strikethrough_thickness = units_to_double( + pango.pango_font_metrics_get_strikethrough_thickness(metrics)) + else: + self.ascent = None + self.underline_position = None + self.strikethrough_position = None + + features = font_features( + style['font_kerning'], style['font_variant_ligatures'], + style['font_variant_position'], style['font_variant_caps'], + style['font_variant_numeric'], style['font_variant_alternates'], + style['font_variant_east_asian'], style['font_feature_settings']) + if features and context: + features = ','.join( + f'{key} {value}' for key, value in features.items()) + + # TODO: attributes should be freed. + # In the meantime, keep a cache to avoid leaking too many of them. + attr = context.font_features.get(features) + if attr is None: + try: + attr = pango.pango_attr_font_features_new( + features.encode('ascii')) + except AttributeError: + LOGGER.error( + 'OpenType features are not available ' + 'with Pango < 1.38') + else: + context.font_features[features] = attr + if attr is not None: + attr_list = pango.pango_attr_list_new() + pango.pango_attr_list_insert(attr_list, attr) + pango.pango_layout_set_attributes(self.layout, attr_list) + + def get_first_line(self): + first_line = pango.pango_layout_get_line_readonly(self.layout, 0) + second_line = pango.pango_layout_get_line_readonly(self.layout, 1) + if second_line != ffi.NULL: + index = second_line.start_index + else: + index = None + self.first_line_direction = first_line.resolved_dir + return first_line, index + + def set_text(self, text, justify=False): + try: + # Keep only the first line plus one character, we don't need more + text = text[:text.index('\n') + 2] + except ValueError: + # End-of-line not found, keept the whole text + pass + text, bytestring = unicode_to_char_p(text) + self.text = bytestring.decode('utf-8') + pango.pango_layout_set_text(self.layout, text, -1) + + # Word spacing may not be set if we're trying to get word-spacing + # computed value using a layout, for example if its unit is ex. + word_spacing = self.style.get('word_spacing', 0) + if justify: + # Justification is needed when drawing text but is useless during + # layout. Ignore it before layout is reactivated before the drawing + # step. + word_spacing += self.justification_spacing + + # Letter spacing may not be set if we're trying to get letter-spacing + # computed value using a layout, for example if its unit is ex. + letter_spacing = self.style.get('letter_spacing', 'normal') + if letter_spacing == 'normal': + letter_spacing = 0 + + if text and (word_spacing != 0 or letter_spacing != 0): + letter_spacing = units_from_double(letter_spacing) + space_spacing = units_from_double(word_spacing) + letter_spacing + attr_list = pango.pango_layout_get_attributes(self.layout) + if not attr_list: + # TODO: list should be freed + attr_list = pango.pango_attr_list_new() + + def add_attr(start, end, spacing): + # TODO: attributes should be freed + attr = pango.pango_attr_letter_spacing_new(spacing) + attr.start_index, attr.end_index = start, end + pango.pango_attr_list_change(attr_list, attr) + + add_attr(0, len(bytestring), letter_spacing) + position = bytestring.find(b' ') + while position != -1: + add_attr(position, position + 1, space_spacing) + position = bytestring.find(b' ', position + 1) + + pango.pango_layout_set_attributes(self.layout, attr_list) + + # Tabs width + if b'\t' in bytestring: + self.set_tabs() + + def set_tabs(self): + if isinstance(self.style['tab_size'], int): + layout = Layout( + self.context, self.style['font_size'], self.style, + self.justification_spacing) + layout.set_text(' ' * self.style['tab_size']) + line, _ = layout.get_first_line() + width, _ = line_size(line, self.style) + width = int(round(width)) + else: + width = int(self.style['tab_size'].value) + # 0 is not handled correctly by Pango + array = ffi.gc( + pango.pango_tab_array_new_with_positions( + 1, True, pango.PANGO_TAB_LEFT, width or 1), + pango.pango_tab_array_free) + pango.pango_layout_set_tabs(self.layout, array) + + def deactivate(self): + del self.layout, self.font, self.language, self.style + + def reactivate(self, style): + self.setup(self.context, style['font_size'], style) + self.set_text(self.text, justify=True) + + +def create_layout(text, style, context, max_width, justification_spacing): + """Return an opaque Pango layout with default Pango line-breaks. + + :param text: Unicode + :param style: a style dict of computed values + :param max_width: + The maximum available width in the same unit as ``style['font_size']``, + or ``None`` for unlimited width. + + """ + layout = Layout( + context, style['font_size'], style, justification_spacing, max_width) + + # Make sure that max_width * Pango.SCALE == max_width * 1024 fits in a + # signed integer. Treat bigger values same as None: unconstrained width. + text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line') + if max_width is not None and text_wrap and max_width < 2 ** 21: + pango.pango_layout_set_width( + layout.layout, units_from_double(max(0, max_width))) + + layout.set_text(text) + return layout + + +def split_first_line(text, style, context, max_width, justification_spacing, + minimum=False): + """Fit as much as possible in the available width for one line of text. + + Return ``(layout, length, resume_at, width, height, baseline)``. + + ``layout``: a pango Layout with the first line + ``length``: length in UTF-8 bytes of the first line + ``resume_at``: The number of UTF-8 bytes to skip for the next line. + May be ``None`` if the whole text fits in one line. + This may be greater than ``length`` in case of preserved + newline characters. + ``width``: width in pixels of the first line + ``height``: height in pixels of the first line + ``baseline``: baseline in pixels of the first line + + """ + # See https://www.w3.org/TR/css-text-3/#white-space-property + text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line') + space_collapse = style['white_space'] in ('normal', 'nowrap', 'pre-line') + + original_max_width = max_width + if not text_wrap: + max_width = None + + # Step #1: Get a draft layout with the first line + layout = None + if (max_width is not None and max_width != float('inf') and + style['font_size']): + if max_width == 0: + # Trying to find minimum size, let's naively split on spaces and + # keep one word + one letter + space_index = text.find(' ') + if space_index == -1: + expected_length = len(text) + else: + expected_length = space_index + 2 # index + space + one letter + else: + expected_length = int(max_width / style['font_size'] * 2.5) + if expected_length < len(text): + # Try to use a small amount of text instead of the whole text + layout = create_layout( + text[:expected_length], style, context, max_width, + justification_spacing) + first_line, index = layout.get_first_line() + if index is None: + # The small amount of text fits in one line, give up and use + # the whole text + layout = None + if layout is None: + layout = create_layout( + text, style, context, original_max_width, justification_spacing) + first_line, index = layout.get_first_line() + resume_at = index + + # Step #2: Don't split lines when it's not needed + if max_width is None: + # The first line can take all the place needed + return first_line_metrics( + first_line, text, layout, resume_at, space_collapse, style) + first_line_width, _ = line_size(first_line, style) + if index is None and first_line_width <= max_width: + # The first line fits in the available width + return first_line_metrics( + first_line, text, layout, resume_at, space_collapse, style) + + # Step #3: Try to put the first word of the second line on the first line + # https://mail.gnome.org/archives/gtk-i18n-list/2013-September/msg00006 + # is a good thread related to this problem. + first_line_text = text.encode('utf-8')[:index].decode('utf-8') + # We can’t rely on first_line_width, see + # https://github.com/Kozea/WeasyPrint/issues/1051 + first_line_fits = ( + first_line_width <= max_width or + ' ' in first_line_text.strip() or + can_break_text(first_line_text.strip(), style['lang'])) + if first_line_fits: + # The first line fits but may have been cut too early by Pango + second_line_text = text.encode('utf-8')[index:].decode('utf-8') + else: + # The line can't be split earlier, try to hyphenate the first word. + first_line_text = '' + second_line_text = text + + next_word = second_line_text.split(' ', 1)[0] + if next_word: + if space_collapse: + # next_word might fit without a space afterwards + # only try when space collapsing is allowed + new_first_line_text = first_line_text + next_word + layout.set_text(new_first_line_text) + first_line, index = layout.get_first_line() + first_line_width, _ = line_size(first_line, style) + if index is None and first_line_text: + # The next word fits in the first line, keep the layout + resume_at = len(new_first_line_text.encode('utf-8')) + 1 + return first_line_metrics( + first_line, text, layout, resume_at, space_collapse, style) + elif index: + # Text may have been split elsewhere by Pango earlier + resume_at = index + else: + # Second line is none + resume_at = first_line.length + 1 + if resume_at >= len(text.encode('utf-8')): + resume_at = None + elif first_line_text: + # We found something on the first line but we did not find a word on + # the next line, no need to hyphenate, we can keep the current layout + return first_line_metrics( + first_line, text, layout, resume_at, space_collapse, style) + + # Step #4: Try to hyphenate + hyphens = style['hyphens'] + lang = style['lang'] and pyphen.language_fallback(style['lang']) + total, left, right = style['hyphenate_limit_chars'] + hyphenated = False + soft_hyphen = '\u00ad' + + try_hyphenate = False + if hyphens != 'none': + next_word_boundaries = get_next_word_boundaries(second_line_text, lang) + if next_word_boundaries: + # We have a word to hyphenate + start_word, stop_word = next_word_boundaries + next_word = second_line_text[start_word:stop_word] + if stop_word - start_word >= total: + # This word is long enough + first_line_width, _ = line_size(first_line, style) + space = max_width - first_line_width + if style['hyphenate_limit_zone'].unit == '%': + limit_zone = ( + max_width * style['hyphenate_limit_zone'].value / 100.) + else: + limit_zone = style['hyphenate_limit_zone'].value + if space > limit_zone or space < 0: + # Available space is worth the try, or the line is even too + # long to fit: try to hyphenate + try_hyphenate = True + + if try_hyphenate: + # Automatic hyphenation possible and next word is long enough + auto_hyphenation = hyphens == 'auto' and lang + manual_hyphenation = False + if auto_hyphenation: + if soft_hyphen in first_line_text or soft_hyphen in next_word: + # Automatic hyphenation opportunities within a word must be + # ignored if the word contains a conditional hyphen, in favor + # of the conditional hyphen(s). + # See https://drafts.csswg.org/css-text-3/#valdef-hyphens-auto + manual_hyphenation = True + else: + manual_hyphenation = hyphens == 'manual' + + if manual_hyphenation: + # Manual hyphenation: check that the line ends with a soft + # hyphen and add the missing hyphen + if first_line_text.endswith(soft_hyphen): + # The first line has been split on a soft hyphen + if ' ' in first_line_text: + first_line_text, next_word = ( + first_line_text.rsplit(' ', 1)) + next_word = f' {next_word}' + layout.set_text(first_line_text) + first_line, index = layout.get_first_line() + resume_at = len((first_line_text + ' ').encode('utf8')) + else: + first_line_text, next_word = '', first_line_text + soft_hyphen_indexes = [ + match.start() for match in re.finditer(soft_hyphen, next_word)] + soft_hyphen_indexes.reverse() + dictionary_iterations = [ + next_word[:i + 1] for i in soft_hyphen_indexes] + elif auto_hyphenation: + dictionary_key = (lang, left, right, total) + dictionary = context.dictionaries.get(dictionary_key) + if dictionary is None: + dictionary = pyphen.Pyphen(lang=lang, left=left, right=right) + context.dictionaries[dictionary_key] = dictionary + dictionary_iterations = [ + start for start, end in dictionary.iterate(next_word)] + else: + dictionary_iterations = [] + + if dictionary_iterations: + for first_word_part in dictionary_iterations: + new_first_line_text = ( + first_line_text + + second_line_text[:start_word] + + first_word_part) + hyphenated_first_line_text = ( + new_first_line_text + style['hyphenate_character']) + new_layout = create_layout( + hyphenated_first_line_text, style, context, max_width, + justification_spacing) + new_first_line, new_index = new_layout.get_first_line() + new_first_line_width, _ = line_size(new_first_line, style) + new_space = max_width - new_first_line_width + if new_index is None and ( + new_space >= 0 or + first_word_part == dictionary_iterations[-1]): + hyphenated = True + layout = new_layout + first_line = new_first_line + index = new_index + resume_at = len(new_first_line_text.encode('utf8')) + if text[len(new_first_line_text)] == soft_hyphen: + # Recreate the layout with no max_width to be sure that + # we don't break before the soft hyphen + pango.pango_layout_set_width( + layout.layout, units_from_double(-1)) + resume_at += len(soft_hyphen.encode('utf8')) + break + + if not hyphenated and not first_line_text: + # Recreate the layout with no max_width to be sure that + # we don't break before or inside the hyphenate character + hyphenated = True + layout.set_text(hyphenated_first_line_text) + pango.pango_layout_set_width( + layout.layout, units_from_double(-1)) + first_line, index = layout.get_first_line() + resume_at = len(new_first_line_text.encode('utf8')) + if text[len(first_line_text)] == soft_hyphen: + resume_at += len(soft_hyphen.encode('utf8')) + + if not hyphenated and first_line_text.endswith(soft_hyphen): + # Recreate the layout with no max_width to be sure that + # we don't break inside the hyphenate-character string + hyphenated = True + hyphenated_first_line_text = ( + first_line_text + style['hyphenate_character']) + layout.set_text(hyphenated_first_line_text) + pango.pango_layout_set_width( + layout.layout, units_from_double(-1)) + first_line, index = layout.get_first_line() + resume_at = len(first_line_text.encode('utf8')) + + # Step 5: Try to break word if it's too long for the line + overflow_wrap = style['overflow_wrap'] + first_line_width, _ = line_size(first_line, style) + space = max_width - first_line_width + # If we can break words and the first line is too long + if not minimum and overflow_wrap == 'break-word' and space < 0: + # Is it really OK to remove hyphenation for word-break ? + hyphenated = False + # TODO: Modify code to preserve W3C condition: + # "Shaping characters are still shaped as if the word were not broken" + # The way new lines are processed in this function (one by one with no + # memory of the last) prevents shaping characters (arabic, for + # instance) from keeping their shape when wrapped on the next line with + # pango layout. Maybe insert Unicode shaping characters in text? + layout.set_text(text) + pango.pango_layout_set_width( + layout.layout, units_from_double(max_width)) + pango.pango_layout_set_wrap( + layout.layout, PANGO_WRAP_MODE['WRAP_CHAR']) + first_line, index = layout.get_first_line() + resume_at = index or first_line.length + if resume_at >= len(text.encode('utf-8')): + resume_at = None + + return first_line_metrics( + first_line, text, layout, resume_at, space_collapse, style, hyphenated, + style['hyphenate_character']) + + +def get_log_attrs(text, lang): + if lang: + lang_p, lang = unicode_to_char_p(lang) + else: + lang = None + language = pango.pango_language_get_default() + if lang: + language = pango.pango_language_from_string(lang_p) + # TODO: this should be removed when bidi is supported + for char in ('\u202a', '\u202b', '\u202c', '\u202d', '\u202e'): + text = text.replace(char, '') + text_p, bytestring = unicode_to_char_p(text) + length = len(text) + 1 + log_attrs = ffi.new('PangoLogAttr[]', length) + pango.pango_get_log_attrs( + text_p, len(bytestring), -1, language, log_attrs, length) + return bytestring, log_attrs + + +def can_break_text(text, lang): + if not text or len(text) < 2: + return None + bytestring, log_attrs = get_log_attrs(text, lang) + length = len(text) + 1 + return any(attr.is_line_break for attr in log_attrs[1:length - 1]) + + +def get_next_word_boundaries(text, lang): + if not text or len(text) < 2: + return None + bytestring, log_attrs = get_log_attrs(text, lang) + for i, attr in enumerate(log_attrs): + if attr.is_word_end: + word_end = i + break + if attr.is_word_boundary: + word_start = i + else: + return None + return word_start, word_end + + +def get_last_word_end(text, lang): + if not text or len(text) < 2: + return None + bytestring, log_attrs = get_log_attrs(text, lang) + for i, attr in enumerate(list(log_attrs)[::-1]): + if i and attr.is_word_end: + return len(text) - i diff --git a/weasyprint/urls.py b/weasyprint/urls.py index ec4e8cf8..0295faf5 100644 --- a/weasyprint/urls.py +++ b/weasyprint/urls.py @@ -95,9 +95,9 @@ def path2url(path): if path.startswith('///'): # On Windows pathname2url(r'C:\foo') is apparently '///C:/foo' # That enough slashes already. - return 'file:' + path + return f'file:{path}' else: - return 'file://' + path + return f'file://{path}' def url_is_absolute(url): @@ -135,7 +135,7 @@ def url_join(base_url, url, allow_relative, context, context_args): return iri_to_uri(url) else: LOGGER.error( - 'Relative URI reference without a base URI: ' + context, + f'Relative URI reference without a base URI: {context}', *context_args) return None @@ -175,15 +175,12 @@ def default_url_fetcher(url, timeout=10, ssl_context=None): """Fetch an external resource such as an image or stylesheet. Another callable with the same signature can be given as the - :obj:`url_fetcher` argument to :class:`HTML` or :class:`CSS`. - (See :ref:`url-fetchers`.) + ``url_fetcher`` argument to :class:`HTML` or :class:`CSS`. + (See :ref:`URL Fetchers`.) - :type url: str - :param url: The URL of the resource to fetch. - :type timeout: int - :param timeout: The number of seconds before HTTP requests are dropped. - :type ssl_context: ssl.SSLContext - :param ssl_context: An SSL context used for HTTP requests. + :param str url: The URL of the resource to fetch. + :param int timeout: The number of seconds before HTTP requests are dropped. + :param ssl.SSLContext ssl_context: An SSL context used for HTTP requests. :raises: An exception indicating failure, e.g. :obj:`ValueError` on syntactically invalid URL. :returns: A :obj:`dict` with the following keys: