WeasyPrint/weasyprint/__init__.py

# coding: utf-8
"""
    WeasyPrint
    ==========

    WeasyPrint converts web documents to PDF.

    The public API is what is accessible from this "root" packages
    without importing sub-modules.

    :copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
    :license: BSD, see LICENSE for details.

"""

from __future__ import division, unicode_literals

import contextlib  # noqa
import html5lib  # noqa


VERSION = '0.25'
__version__ = VERSION

# Used for 'User-Agent' in HTTP and 'Creator' in PDF
VERSION_STRING = 'WeasyPrint %s (http://weasyprint.org/)' % VERSION

__all__ = ['HTML', 'CSS', 'Attachment', 'Document', 'Page',
           'default_url_fetcher', 'VERSION']


# Import after setting the version, as the version is used in other modules
from .urls import (fetch, default_url_fetcher, path2url, ensure_url,
                   url_is_absolute)  # noqa
from .compat import unicode  # noqa
from .logger import LOGGER  # noqa
# Some imports are at the end of the file (after the CSS class)
# to work around circular imports.


class HTML(object):
    """Represents an HTML document parsed by `lxml <http://lxml.de/>`_.

    You can just create an instance with a positional argument:
    ``doc = HTML(something)``
    The class will try to guess if the input is a filename, an absolute URL,
    or a file-like object.

    Alternatively, use **one** named argument so that no guessing is involved:

    :param filename: A filename, relative to the current directory or absolute.
    :param url: An absolute, fully qualified URL.
    :param file_obj: a file-like: any object with a :meth:`~file.read` method.
    :param string: a string of HTML source. (This argument must be named.)
    :param tree: a parsed lxml tree. (This argument must be named.)

    Specifying multiple inputs is an error: ``HTML(filename=foo, url=bar)``
    will raise.

    You can also pass optional named arguments:

    :param encoding: Force the source character encoding.
    :param base_url: The base used to resolve relative URLs
        (eg. in ``<img src="../foo.png">``). If not provided, try to use
        the input filename, URL, or ``name`` attribute of file-like objects.
    :param url_fetcher: a function or other callable
        with the same signature as :func:`default_url_fetcher` called to
        fetch external resources such as stylesheets and images.
        (See :ref:`url-fetchers`.)
    :param media_type: The media type to use for ``@media``.
        Defaults to ``'print'``. **Note:** In some cases like
        ``HTML(string=foo)`` relative URLs will be invalid if ``base_url``
        is not provided.

    """
    def __init__(self, guess=None, filename=None, url=None, file_obj=None,
                 string=None, tree=None, encoding=None, base_url=None,
                 url_fetcher=default_url_fetcher, media_type='print'):
        result = _select_source(
            guess, filename, url, file_obj, string, tree, base_url,
            url_fetcher)
        with result as (source_type, source, base_url, protocol_encoding):
            if source_type == 'tree':
                result = source
            else:
                if not encoding:
                    encoding = protocol_encoding
                if isinstance(source, unicode):
                    encoding = None
                result = html5lib.parse(
                    source, treebuilder='lxml', encoding=encoding,
                    namespaceHTMLElements=False)
                assert result
        base_url = find_base_url(result, base_url)
        if hasattr(result, 'getroot'):
            result.docinfo.URL = base_url
            result = result.getroot()
        else:
            result.getroottree().docinfo.URL = base_url
        self.root_element = result
        self.base_url = base_url
        self.url_fetcher = url_fetcher
        self.media_type = media_type

    def _ua_stylesheets(self):
        return [HTML5_UA_STYLESHEET]

    def _get_metadata(self):
        return get_html_metadata(self.root_element)

    def render(self, stylesheets=None, enable_hinting=False):
        """Lay out and paginate the document, but do not (yet) export it
        to PDF or another format.

        This returns a :class:`~document.Document` object which provides
        access to individual pages and various meta-data.
        See :meth:`write_pdf` to get a PDF directly.

        .. versionadded:: 0.15

        :param stylesheets:
            An optional list of user stylesheets. (See
            :ref:`stylesheet-origins`\.) List elements are :class:`CSS`
            objects, filenames, URLs, or file-like objects.
        :type enable_hinting: bool
        :param enable_hinting:
            Whether text, borders and background should be *hinted* to fall
            at device pixel boundaries. Should be enabled for pixel-based
            output (like PNG) but not vector based output (like PDF).
        :returns: A :class:`~document.Document` object.

        """
        return Document._render(self, stylesheets, enable_hinting)

    def write_pdf(self, target=None, stylesheets=None, zoom=1,
                  attachments=None):
        """Render the document to a PDF file.

        This is a shortcut for calling :meth:`render`, then
        :meth:`Document.write_pdf() <document.Document.write_pdf>`.

        :param target:
            A filename, file-like object, or :obj:`None`.
        :param stylesheets:
            An optional list of user stylesheets. (See
            :ref:`stylesheet-origins`\.) The list’s elements are
            :class:`CSS` objects, filenames, URLs, or file-like objects.
        :type zoom: float
        :param zoom:
            The zoom factor in PDF units per CSS units.
            **Warning**: All CSS units (even physical, like ``cm``)
            are affected.
            For values other than 1, physical CSS units will thus be “wrong”.
            Page size declarations are affected too, even with keyword values
            like ``@page { size: A3 landscape; }``
        :param attachments: A list of additional file attachments for the
            generated PDF document or :obj:`None`. The list's elements are
            :class:`Attachment` objects, filenames, URLs or file-like objects.
        :returns:
            The PDF as byte string if :obj:`target` is not provided or
            :obj:`None`, otherwise :obj:`None` (the PDF is written to
            :obj:`target`.)

        """
        return self.render(stylesheets).write_pdf(target, zoom, attachments)

    def write_image_surface(self, stylesheets=None, resolution=96):
        surface, _width, _height = (
            self.render(stylesheets, enable_hinting=True)
            .write_image_surface(resolution))
        return surface

    def write_png(self, target=None, stylesheets=None, resolution=96):
        """Paint the pages vertically to a single PNG image.

        There is no decoration around pages other than those specified in CSS
        with ``@page`` rules. The final image is as wide as the widest page.
        Each page is below the previous one, centered horizontally.

        This is a shortcut for calling :meth:`render`, then
        :meth:`Document.write_png() <document.Document.write_png>`.

        :param target:
            A filename, file-like object, or :obj:`None`.
        :param stylesheets:
            An optional list of user stylesheets. (See
            :ref:`stylesheet-origins`\.) The list’s elements are
            :class:`CSS` objects, filenames, URLs, or file-like objects.
        :type resolution: float
        :param resolution:
            The output resolution in PNG pixels per CSS inch. At 96 dpi
            (the default), PNG pixels match the CSS ``px`` unit.
        :returns:
            The image as byte string if :obj:`target` is not provided or
            :obj:`None`, otherwise :obj:`None` (the image is written to
            :obj:`target`.)

        """
        png_bytes, _width, _height = (
            self.render(stylesheets, enable_hinting=True)
            .write_png(target, resolution))
        return png_bytes


class CSS(object):
    """Represents a CSS stylesheet parsed by tinycss.

    An instance is created in the same way as :class:`HTML`, except that
    the ``tree`` parameter is not available. All other parameters are the same.

    ``CSS`` objects have no public attribute or method. They are only meant to
    be used in the :meth:`~HTML.write_pdf`, :meth:`~HTML.write_png` and
    :meth:`~HTML.render` methods of :class:`HTML` objects.

    """
    def __init__(self, guess=None, filename=None, url=None, file_obj=None,
                 string=None, encoding=None, base_url=None,
                 url_fetcher=default_url_fetcher, _check_mime_type=False,
                 media_type='print'):
        result = _select_source(
            guess, filename, url, file_obj, string, tree=None,
            base_url=base_url, url_fetcher=url_fetcher,
            check_css_mime_type=_check_mime_type,)
        with result as (source_type, source, base_url, protocol_encoding):
            if source_type == 'string' and not isinstance(source, bytes):
                # unicode, no encoding
                stylesheet = PARSER.parse_stylesheet(source)
            else:
                if source_type == 'file_obj':
                    source = source.read()
                stylesheet = PARSER.parse_stylesheet_bytes(
                    source, linking_encoding=encoding,
                    protocol_encoding=protocol_encoding)
        self.base_url = base_url
        self.rules = list(preprocess_stylesheet(
            media_type, base_url, stylesheet.rules, url_fetcher))
        # TODO: do not keep this self.stylesheet around?
        self.stylesheet = stylesheet
        for error in self.stylesheet.errors:
            LOGGER.warning(error)


class Attachment(object):
    """Represents a file attachment for a PDF document.

    An instance is created in the same way as :class:`HTML`, except that
    the HTML specific parameters are not supported. An optional description can
    be provided with the ``description`` parameter.

    :param description: A description of the attachment to be included in the
        PDF document. May be :obj:`None`

    """
    def __init__(self, guess=None, filename=None, url=None, file_obj=None,
                 string=None, base_url=None, url_fetcher=default_url_fetcher,
                 description=None):
        self.source = _select_source(
            guess, filename, url, file_obj, string, tree=None,
            base_url=base_url, url_fetcher=url_fetcher)
        self.description = description


@contextlib.contextmanager
def _select_source(guess=None, filename=None, url=None, file_obj=None,
                   string=None, tree=None, base_url=None,
                   url_fetcher=default_url_fetcher, check_css_mime_type=False):
    """
    Check that only one input is not None, and return it with the
    normalized ``base_url``.

    """
    if base_url is not None:
        base_url = ensure_url(base_url)

    nones = [guess is None, filename is None, url is None,
             file_obj is None, string is None, tree is None]
    if nones == [False, True, True, True, True, True]:
        if hasattr(guess, 'read'):
            type_ = 'file_obj'
        elif url_is_absolute(guess):
            type_ = 'url'
        else:
            type_ = 'filename'
        result = _select_source(
            base_url=base_url, url_fetcher=url_fetcher,
            check_css_mime_type=check_css_mime_type,
            # Use str() to work around http://bugs.python.org/issue4978
            # See https://github.com/Kozea/WeasyPrint/issues/97
            **{str(type_): guess})
        with result as result:
            yield result
    elif nones == [True, False, True, True, True, True]:
        if base_url is None:
            base_url = path2url(filename)
        with open(filename, 'rb') as file_obj:
            yield 'file_obj', file_obj, base_url, None
    elif nones == [True, True, False, True, True, True]:
        with fetch(url_fetcher, url) as result:
            if check_css_mime_type and result['mime_type'] != 'text/css':
                LOGGER.warning(
                    'Unsupported stylesheet type %s for %s',
                    result['mime_type'], result['redirected_url'])
                yield 'string', '', base_url, None
            else:
                proto_encoding = result.get('encoding')
                if base_url is None:
                    base_url = result.get('redirected_url', url)
                if 'string' in result:
                    yield 'string', result['string'], base_url, proto_encoding
                else:
                    yield (
                        'file_obj', result['file_obj'], base_url,
                        proto_encoding)
    elif nones == [True, True, True, False, True, True]:
        if base_url is None:
            # filesystem file-like objects have a 'name' attribute.
            name = getattr(file_obj, 'name', None)
            # Some streams have a .name like '<stdin>', not a filename.
            if name and not name.startswith('<'):
                base_url = ensure_url(name)
        yield 'file_obj', file_obj, base_url, None
    elif nones == [True, True, True, True, False, True]:
        yield 'string', string, base_url, None
    elif nones == [True, True, True, True, True, False]:
        yield 'tree', tree, base_url, None
    else:
        raise TypeError('Expected exactly one source, got ' + (
            ', '.join(
                name for i, name in enumerate(
                    'guess filename url file_obj string tree'.split())
                if not nones[i]
            ) or 'nothing'
        ))

# Work around circular imports.
from .css import PARSER, preprocess_stylesheet  # noqa
from .html import find_base_url, HTML5_UA_STYLESHEET, get_html_metadata  # noqa
from .document import Document, Page  # noqa
-												Replace utf8 with utf-8 for gettext compatibility

											
										
										
											2015-11-25 10:38:01 +03:00
+								# coding: utf-8
-												Switch the licence to BSD and rewrite module docstrings/headers

											
										
										
											2012-03-22 02:19:27 +04:00
+								"""
 								    WeasyPrint
 								    ==========
-												Add links to the website and an AGPL licence.

											
										
										
											2011-04-28 21:15:30 +04:00
-												Switch the licence to BSD and rewrite module docstrings/headers

											
										
										
											2012-03-22 02:19:27 +04:00
+								    WeasyPrint converts web documents to PDF.
-												Add a weasyprint.py script.

											
										
										
											2011-08-09 14:45:51 +04:00
-												Switch the licence to BSD and rewrite module docstrings/headers

											
										
										
											2012-03-22 02:19:27 +04:00
+								    The public API is what is accessible from this "root" packages
 								    without importing sub-modules.
-												Clean weasy/__init__

											
										
										
											2011-08-19 13:22:31 +04:00
-.

											
										
										
											2014-01-10 18:27:02 +04:00
+								    :copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
-												Switch the licence to BSD and rewrite module docstrings/headers

											
										
										
											2012-03-22 02:19:27 +04:00
+								    :license: BSD, see LICENSE for details.
-												Clean weasy/__init__

											
										
										
											2011-08-19 13:22:31 +04:00
 								"""
-												Python 3 compat. All tests pass with the same code base!

											
										
										
											2012-02-17 21:49:58 +04:00
+								from __future__ import division, unicode_literals
-												Fix many PEP8 errors and warnings

- Put line breaks after operators (that's OK)
- Don't assign lambdas, use functions (well, that's really explicit in
  PEP8, it's really verbose but why not)
- Put imports at the beginning of the file (only special cases for us)

											
										
										
											2016-01-15 14:47:03 +03:00
+								import contextlib  # noqa
 								import html5lib  # noqa
-												Python 3 compat. All tests pass with the same code base!

											
										
										
											2012-02-17 21:49:58 +04:00
-												Version 0.25

											
										
										
											2015-12-17 14:01:34 +03:00
+								VERSION = '0.25'
-												De-duplicate the version number.

											
										
										
											2012-02-07 19:11:38 +04:00
+								__version__ = VERSION
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												Have the PDF post-process run on Python3 (links are broken)

											
										
										
											2012-05-15 15:40:36 +04:00
+								# Used for 'User-Agent' in HTTP and 'Creator' in PDF
 								VERSION_STRING = 'WeasyPrint %s (http://weasyprint.org/)' % VERSION
-												Fix code formatting

											
										
										
											2014-04-27 21:16:14 +04:00
+								__all__ = ['HTML', 'CSS', 'Attachment', 'Document', 'Page',
 								           'default_url_fetcher', 'VERSION']
-												Have the PDF post-process run on Python3 (links are broken)

											
										
										
											2012-05-15 15:40:36 +04:00
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
-												Fix many PEP8 errors and warnings

- Put line breaks after operators (that's OK)
- Don't assign lambdas, use functions (well, that's really explicit in
  PEP8, it's really verbose but why not)
- Put imports at the beginning of the file (only special cases for us)

											
										
										
											2016-01-15 14:47:03 +03:00
+								# Import after setting the version, as the version is used in other modules
-												URL fetching: use an intermediate function rather than a wrapper

											
										
										
											2013-06-20 15:17:03 +04:00
+								from .urls import (fetch, default_url_fetcher, path2url, ensure_url,
-												Fix many PEP8 errors and warnings

- Put line breaks after operators (that's OK)
- Don't assign lambdas, use functions (well, that's really explicit in
  PEP8, it's really verbose but why not)
- Put imports at the beginning of the file (only special cases for us)

											
										
										
											2016-01-15 14:47:03 +03:00
+								                   url_is_absolute)  # noqa
 								from .compat import unicode  # noqa
 								from .logger import LOGGER  # noqa
 								# Some imports are at the end of the file (after the CSS class)
-												Import stuff early, do not try to run --help without importing.

											
										
										
											2013-02-25 18:21:25 +04:00
+								# to work around circular imports.
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								class HTML(object):
 								    """Represents an HTML document parsed by `lxml <http://lxml.de/>`_.
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    You can just create an instance with a positional argument:
 								    ``doc = HTML(something)``
 								    The class will try to guess if the input is a filename, an absolute URL,
 								    or a file-like object.
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    Alternatively, use **one** named argument so that no guessing is involved:
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    :param filename: A filename, relative to the current directory or absolute.
 								    :param url: An absolute, fully qualified URL.
 								    :param file_obj: a file-like: any object with a :meth:`~file.read` method.
 								    :param string: a string of HTML source. (This argument must be named.)
 								    :param tree: a parsed lxml tree. (This argument must be named.)
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
 								    Specifying multiple inputs is an error: ``HTML(filename=foo, url=bar)``
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    will raise.
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    You can also pass optional named arguments:
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    :param encoding: Force the source character encoding.
 								    :param base_url: The base used to resolve relative URLs
 								        (eg. in ``<img src="../foo.png">``). If not provided, try to use
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        the input filename, URL, or ``name`` attribute of file-like objects.
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								    :param url_fetcher: a function or other callable
 								        with the same signature as :func:`default_url_fetcher` called to
 								        fetch external resources such as stylesheets and images.
 								        (See :ref:`url-fetchers`.)
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    :param media_type: The media type to use for ``@media``.
 								        Defaults to ``'print'``. **Note:** In some cases like
 								        ``HTML(string=foo)`` relative URLs will be invalid if ``base_url``
 								        is not provided.
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
 								    """
 								    def __init__(self, guess=None, filename=None, url=None, file_obj=None,
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
+								                 string=None, tree=None, encoding=None, base_url=None,
-												Refactored `attachments` attribute from the `HTML` class to an argument for `write_pdf`

											
										
										
											2014-04-22 22:40:46 +04:00
+								                 url_fetcher=default_url_fetcher, media_type='print'):
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        result = _select_source(
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
+								            guess, filename, url, file_obj, string, tree, base_url,
 								            url_fetcher)
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        with result as (source_type, source, base_url, protocol_encoding):
 								            if source_type == 'tree':
 								                result = source
-												Also accept a parsed lxml tree for the HTML class.

											
										
										
											2012-05-24 18:47:40 +04:00
+								            else:
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								                if not encoding:
 								                    encoding = protocol_encoding
-												Switch to html5lib to parse HTML. Fix #12.

											
										
										
											2013-07-22 03:10:25 +04:00
+								                if isinstance(source, unicode):
 								                    encoding = None
 								                result = html5lib.parse(
 								                    source, treebuilder='lxml', encoding=encoding,
 								                    namespaceHTMLElements=False)
 								                assert result
-												Add support for <base href="..."> element in HTML.

											
										
										
											2012-08-03 17:20:22 +04:00
+								        base_url = find_base_url(result, base_url)
-												Also accept a parsed lxml tree for the HTML class.

											
										
										
											2012-05-24 18:47:40 +04:00
+								        if hasattr(result, 'getroot'):
 								            result.docinfo.URL = base_url
-												Nicer message on HTML parse error

											
										
										
											2012-02-16 21:27:30 +04:00
+								            result = result.getroot()
-												Also accept a parsed lxml tree for the HTML class.

											
										
										
											2012-05-24 18:47:40 +04:00
+								        else:
 								            result.getroottree().docinfo.URL = base_url
-												Nicer message on HTML parse error

											
										
										
											2012-02-16 21:27:30 +04:00
+								        self.root_element = result
-												Fix #772: Use redirected URLs as base URLs

											
										
										
											2012-06-15 00:08:34 +04:00
+								        self.base_url = base_url
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
+								        self.url_fetcher = url_fetcher
-												s/medium/media_type/ s/user_agent_stylesheets/ua_stylesheets/

											
										
										
											2012-08-02 15:04:31 +04:00
+								        self.media_type = media_type
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												Get rid of the Document class (finally!)

											
										
										
											2012-09-25 18:01:47 +04:00
+								    def _ua_stylesheets(self):
-												Use the testing UA stylesheet in test_api.py

											
										
										
											2012-02-16 21:52:36 +04:00
+								        return [HTML5_UA_STYLESHEET]
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
+								    def _get_metadata(self):
-												Refactored `attachments` attribute from the `HTML` class to an argument for `write_pdf`

											
										
										
											2014-04-22 22:40:46 +04:00
+								        return get_html_metadata(self.root_element)
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								    def render(self, stylesheets=None, enable_hinting=False):
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        """Lay out and paginate the document, but do not (yet) export it
 								        to PDF or another format.
-												Add a low-level public API

											
										
										
											2012-09-12 21:33:16 +04:00
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								        This returns a :class:`~document.Document` object which provides
 								        access to individual pages and various meta-data.
 								        See :meth:`write_pdf` to get a PDF directly.
 								        .. versionadded:: 0.15
-												Add a low-level public API

											
										
										
											2012-09-12 21:33:16 +04:00
-												Get rid of the Document class (finally!)

											
										
										
											2012-09-25 18:01:47 +04:00
+								        :param stylesheets:
 								            An optional list of user stylesheets. (See
 								            :ref:`stylesheet-origins`\.) List elements are :class:`CSS`
 								            objects, filenames, URLs, or file-like objects.
-												Moar docs for the low-level API.

											
										
										
											2012-09-20 19:21:44 +04:00
+								        :type enable_hinting: bool
 								        :param enable_hinting:
 								            Whether text, borders and background should be *hinted* to fall
 								            at device pixel boundaries. Should be enabled for pixel-based
 								            output (like PNG) but not vector based output (like PDF).
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								        :returns: A :class:`~document.Document` object.
-												Moar docs for the low-level API.

											
										
										
											2012-09-20 19:21:44 +04:00
-												Add a low-level public API

											
										
										
											2012-09-12 21:33:16 +04:00
+								        """
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								        return Document._render(self, stylesheets, enable_hinting)
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Refactored `attachments` attribute from the `HTML` class to an argument for `write_pdf`

											
										
										
											2014-04-22 22:40:46 +04:00
+								    def write_pdf(self, target=None, stylesheets=None, zoom=1,
-												Fix code formatting

											
										
										
											2014-04-27 21:16:14 +04:00
+								                  attachments=None):
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								        """Render the document to a PDF file.
 								        This is a shortcut for calling :meth:`render`, then
 								        :meth:`Document.write_pdf() <document.Document.write_pdf>`.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
 								        :param target:
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								            A filename, file-like object, or :obj:`None`.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        :param stylesheets:
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								            An optional list of user stylesheets. (See
 								            :ref:`stylesheet-origins`\.) The list’s elements are
 								            :class:`CSS` objects, filenames, URLs, or file-like objects.
-												Rename PDF scale to zoom, have the default be 1 rather than 0.75

The 0.75 factor is an implementation detail that should not be exposed
in the API.

											
										
										
											2012-11-23 01:27:34 +04:00
+								        :type zoom: float
 								        :param zoom:
 								            The zoom factor in PDF units per CSS units.
 								            **Warning**: All CSS units (even physical, like ``cm``)
 								            are affected.
 								            For values other than 1, physical CSS units will thus be “wrong”.
 								            Page size declarations are affected too, even with keyword values
 								            like ``@page { size: A3 landscape; }``
-												Refactored `attachments` attribute from the `HTML` class to an argument for `write_pdf`

											
										
										
											2014-04-22 22:40:46 +04:00
+								        :param attachments: A list of additional file attachments for the
-												Added an `Attachment` class for attachments provided through the API instead of the URL/description tuples

											
										
										
											2014-04-26 01:35:43 +04:00
+								            generated PDF document or :obj:`None`. The list's elements are
 								            :class:`Attachment` objects, filenames, URLs or file-like objects.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        :returns:
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								            The PDF as byte string if :obj:`target` is not provided or
 								            :obj:`None`, otherwise :obj:`None` (the PDF is written to
 								            :obj:`target`.)
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        """
-												Refactored `attachments` attribute from the `HTML` class to an argument for `write_pdf`

											
										
										
											2014-04-22 22:40:46 +04:00
+								        return self.render(stylesheets).write_pdf(target, zoom, attachments)
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												More WIP: cairocffi and pango cffi.

											
										
										
											2012-12-29 04:00:30 +04:00
+								    def write_image_surface(self, stylesheets=None, resolution=96):
 								        surface, _width, _height = (
 								            self.render(stylesheets, enable_hinting=True)
 								            .write_image_surface(resolution))
 								        return surface
-												Generalize hinting and resolution

... in the low-level API, not just PNG output.

											
										
										
											2012-09-20 21:24:58 +04:00
+								    def write_png(self, target=None, stylesheets=None, resolution=96):
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        """Paint the pages vertically to a single PNG image.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        There is no decoration around pages other than those specified in CSS
 								        with ``@page`` rules. The final image is as wide as the widest page.
 								        Each page is below the previous one, centered horizontally.
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								        This is a shortcut for calling :meth:`render`, then
 								        :meth:`Document.write_png() <document.Document.write_png>`.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        :param target:
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								            A filename, file-like object, or :obj:`None`.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        :param stylesheets:
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								            An optional list of user stylesheets. (See
 								            :ref:`stylesheet-origins`\.) The list’s elements are
 								            :class:`CSS` objects, filenames, URLs, or file-like objects.
-												Moar docs for the low-level API.

											
										
										
											2012-09-20 19:21:44 +04:00
+								        :type resolution: float
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								        :param resolution:
 								            The output resolution in PNG pixels per CSS inch. At 96 dpi
 								            (the default), PNG pixels match the CSS ``px`` unit.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        :returns:
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								            The image as byte string if :obj:`target` is not provided or
 								            :obj:`None`, otherwise :obj:`None` (the image is written to
 								            :obj:`target`.)
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        """
-												Remove with_size in Document.write_png

The method now always returns a tuple with the size.
HTML.write_png however still does not return the size.

											
										
										
											2012-10-05 20:19:17 +04:00
+								        png_bytes, _width, _height = (
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								            self.render(stylesheets, enable_hinting=True)
 								            .write_png(target, resolution))
-												Remove with_size in Document.write_png

The method now always returns a tuple with the size.
HTML.write_png however still does not return the size.

											
										
										
											2012-10-05 20:19:17 +04:00
+								        return png_bytes
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								class CSS(object):
 								    """Represents a CSS stylesheet parsed by tinycss.
 								    An instance is created in the same way as :class:`HTML`, except that
 								    the ``tree`` parameter is not available. All other parameters are the same.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												Integrate the sphinx docs with docstrings.

											
										
										
											2012-09-19 19:37:52 +04:00
+								    ``CSS`` objects have no public attribute or method. They are only meant to
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								    be used in the :meth:`~HTML.write_pdf`, :meth:`~HTML.write_png` and
 								    :meth:`~HTML.render` methods of :class:`HTML` objects.
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
+								    """
 								    def __init__(self, guess=None, filename=None, url=None, file_obj=None,
-												Switch from cssutils to tinycss as the CSS parser

											
										
										
											2012-03-24 16:39:31 +04:00
+								                 string=None, encoding=None, base_url=None,
-												s/medium/media_type/ s/user_agent_stylesheets/ua_stylesheets/

											
										
										
											2012-08-02 15:04:31 +04:00
+								                 url_fetcher=default_url_fetcher, _check_mime_type=False,
 								                 media_type='print'):
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        result = _select_source(
-												Also accept a parsed lxml tree for the HTML class.

											
										
										
											2012-05-24 18:47:40 +04:00
+								            guess, filename, url, file_obj, string, tree=None,
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
+								            base_url=base_url, url_fetcher=url_fetcher,
 								            check_css_mime_type=_check_mime_type,)
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        with result as (source_type, source, base_url, protocol_encoding):
 								            if source_type == 'string' and not isinstance(source, bytes):
-												Switch from cssutils to tinycss as the CSS parser

											
										
										
											2012-03-24 16:39:31 +04:00
+								                # unicode, no encoding
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								                stylesheet = PARSER.parse_stylesheet(source)
 								            else:
 								                if source_type == 'file_obj':
 								                    source = source.read()
 								                stylesheet = PARSER.parse_stylesheet_bytes(
 								                    source, linking_encoding=encoding,
 								                    protocol_encoding=protocol_encoding)
-												Switch from cssutils to tinycss as the CSS parser

											
										
										
											2012-03-24 16:39:31 +04:00
+								        self.base_url = base_url
-												Also pre-proccess @import and @media

											
										
										
											2012-03-25 04:41:02 +04:00
+								        self.rules = list(preprocess_stylesheet(
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								            media_type, base_url, stylesheet.rules, url_fetcher))
 								        # TODO: do not keep this self.stylesheet around?
 								        self.stylesheet = stylesheet
-												Switch from cssutils to tinycss as the CSS parser

											
										
										
											2012-03-24 16:39:31 +04:00
+								        for error in self.stylesheet.errors:
-												Be careful logging.warn is deprecated

											
										
										
											2013-08-19 16:38:09 +04:00
+								            LOGGER.warning(error)
-												Switch from cssutils to tinycss as the CSS parser

											
										
										
											2012-03-24 16:39:31 +04:00
-												Fix code formatting

											
										
										
											2014-04-27 21:16:14 +04:00
-												Added an `Attachment` class for attachments provided through the API instead of the URL/description tuples

											
										
										
											2014-04-26 01:35:43 +04:00
+								class Attachment(object):
 								    """Represents a file attachment for a PDF document.
 								    An instance is created in the same way as :class:`HTML`, except that
 								    the HTML specific parameters are not supported. An optional description can
 								    be provided with the ``description`` parameter.
 								    :param description: A description of the attachment to be included in the
 								        PDF document. May be :obj:`None`
 								    """
 								    def __init__(self, guess=None, filename=None, url=None, file_obj=None,
 								                 string=None, base_url=None, url_fetcher=default_url_fetcher,
 								                 description=None):
 								        self.source = _select_source(
 								            guess, filename, url, file_obj, string, tree=None,
 								            base_url=base_url, url_fetcher=url_fetcher)
 								        self.description = description
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								@contextlib.contextmanager
-												Switch from cssutils to tinycss as the CSS parser

											
										
										
											2012-03-24 16:39:31 +04:00
+								def _select_source(guess=None, filename=None, url=None, file_obj=None,
-												Fix #772: Use redirected URLs as base URLs

											
										
										
											2012-06-15 00:08:34 +04:00
+								                   string=None, tree=None, base_url=None,
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
+								                   url_fetcher=default_url_fetcher, check_css_mime_type=False):
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
+								    """
 								    Check that only one input is not None, and return it with the
 								    normalized ``base_url``.
-												Download HTML URLs with the same mechanism as everything else.

Previously we passed the URLs to lxml.
lxml does not support as many protocols, and did not send
any User-Agent HTTP header.

											
										
										
											2012-05-31 16:38:34 +04:00
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
+								    """
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								    if base_url is not None:
 								        base_url = ensure_url(base_url)
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
+								    nones = [guess is None, filename is None, url is None,
-												Also accept a parsed lxml tree for the HTML class.

											
										
										
											2012-05-24 18:47:40 +04:00
+								             file_obj is None, string is None, tree is None]
 								    if nones == [False, True, True, True, True, True]:
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
+								        if hasattr(guess, 'read'):
 								            type_ = 'file_obj'
-												Download HTML URLs with the same mechanism as everything else.

Previously we passed the URLs to lxml.
lxml does not support as many protocols, and did not send
any User-Agent HTTP header.

											
										
										
											2012-05-31 16:38:34 +04:00
+								        elif url_is_absolute(guess):
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
+								            type_ = 'url'
-												*Break the API*

The new API should be easier to use. Only what is directly
in the 'weasyprint' module (not in a sub-module) is public.

											
										
										
											2012-02-15 21:49:37 +04:00
+								        else:
-												Allow a file object as a positional argument in the API.

											
										
										
											2012-02-27 18:07:41 +04:00
+								            type_ = 'filename'
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        result = _select_source(
-												Add ``url_fetcher`` to the public API

											
										
										
											2012-07-13 14:24:55 +04:00
+								            base_url=base_url, url_fetcher=url_fetcher,
 								            check_css_mime_type=check_css_mime_type,
-												Tentative fix for #97: w.HTML(positional_argument) crashes Python < 2.6.5

											
										
										
											2013-07-11 21:43:11 +04:00
+								            # Use str() to work around http://bugs.python.org/issue4978
 								            # See https://github.com/Kozea/WeasyPrint/issues/97
 								            **{str(type_): guess})
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        with result as result:
 								            yield result
 								    elif nones == [True, False, True, True, True, True]:
-												Switch from cssutils to tinycss as the CSS parser

											
										
										
											2012-03-24 16:39:31 +04:00
+								        if base_url is None:
 								            base_url = path2url(filename)
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        with open(filename, 'rb') as file_obj:
 								            yield 'file_obj', file_obj, base_url, None
 								    elif nones == [True, True, False, True, True, True]:
 								        with fetch(url_fetcher, url) as result:
 								            if check_css_mime_type and result['mime_type'] != 'text/css':
-												Be careful logging.warn is deprecated

											
										
										
											2013-08-19 16:38:09 +04:00
+								                LOGGER.warning(
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								                    'Unsupported stylesheet type %s for %s',
 								                    result['mime_type'], result['redirected_url'])
 								                yield 'string', '', base_url, None
 								            else:
-												Fix source selector generator not ending with stylesheets using bad mimetype

											
										
										
											2013-12-13 17:12:31 +04:00
+								                proto_encoding = result.get('encoding')
 								                if base_url is None:
 								                    base_url = result.get('redirected_url', url)
 								                if 'string' in result:
 								                    yield 'string', result['string'], base_url, proto_encoding
 								                else:
 								                    yield (
 								                        'file_obj', result['file_obj'], base_url,
 								                        proto_encoding)
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								    elif nones == [True, True, True, False, True, True]:
-												Take a base_url from file().name

											
										
										
											2012-02-16 16:40:29 +04:00
+								        if base_url is None:
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								            # filesystem file-like objects have a 'name' attribute.
-												Take a base_url from file().name

											
										
										
											2012-02-16 16:40:29 +04:00
+								            name = getattr(file_obj, 'name', None)
-												Make mime_type optional in custom URL fetchers

											
										
										
											2012-07-18 16:31:55 +04:00
+								            # Some streams have a .name like '<stdin>', not a filename.
 								            if name and not name.startswith('<'):
-												Take a base_url from file().name

											
										
										
											2012-02-16 16:40:29 +04:00
+								                base_url = ensure_url(name)
-												URL fetching: enforce closing sockets/files with a context manager.

											
										
										
											2013-06-20 15:58:24 +04:00
+								        yield 'file_obj', file_obj, base_url, None
 								    elif nones == [True, True, True, True, False, True]:
 								        yield 'string', string, base_url, None
 								    elif nones == [True, True, True, True, True, False]:
 								        yield 'tree', tree, base_url, None
 								    else:
 								        raise TypeError('Expected exactly one source, got ' + (
 								            ', '.join(
 								                name for i, name in enumerate(
 								                    'guess filename url file_obj string tree'.split())
 								                if not nones[i]
 								            ) or 'nothing'
 								        ))
-												Import stuff early, do not try to run --help without importing.

											
										
										
											2013-02-25 18:21:25 +04:00
 								# Work around circular imports.
-												Fix many PEP8 errors and warnings

- Put line breaks after operators (that's OK)
- Don't assign lambdas, use functions (well, that's really explicit in
  PEP8, it's really verbose but why not)
- Put imports at the beginning of the file (only special cases for us)

											
										
										
											2016-01-15 14:47:03 +03:00
+								from .css import PARSER, preprocess_stylesheet  # noqa
 								from .html import find_base_url, HTML5_UA_STYLESHEET, get_html_metadata  # noqa
 								from .document import Document, Page  # noqa