2011-06-30 00:34:01 +04:00
|
|
|
|
# coding: utf8
|
|
|
|
|
|
|
|
|
|
# WeasyPrint converts web documents (HTML, CSS, ...) to PDF.
|
|
|
|
|
# Copyright (C) 2011 Simon Sapin
|
|
|
|
|
#
|
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
2011-08-16 17:11:35 +04:00
|
|
|
|
|
2011-08-19 18:53:05 +04:00
|
|
|
|
"""
|
|
|
|
|
Various utils.
|
|
|
|
|
|
|
|
|
|
"""
|
2011-06-30 00:34:01 +04:00
|
|
|
|
|
2012-02-17 21:49:58 +04:00
|
|
|
|
from __future__ import division, unicode_literals
|
|
|
|
|
|
|
|
|
|
import io
|
|
|
|
|
import base64
|
2011-06-30 00:34:01 +04:00
|
|
|
|
|
2011-08-09 14:45:51 +04:00
|
|
|
|
from cssutils.helper import path2url
|
|
|
|
|
|
2011-10-17 17:04:13 +04:00
|
|
|
|
from . import VERSION
|
2012-02-22 20:12:40 +04:00
|
|
|
|
from .logger import LOGGER
|
2012-02-17 21:49:58 +04:00
|
|
|
|
from .compat import (
|
|
|
|
|
urljoin, urlparse, unquote_to_bytes, urlopen_contenttype, Request,
|
|
|
|
|
parse_email)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HTTP_USER_AGENT = 'WeasyPrint/%s http://weasyprint.org/' % VERSION
|
2011-12-16 15:19:10 +04:00
|
|
|
|
|
|
|
|
|
|
2011-08-05 13:16:44 +04:00
|
|
|
|
def get_url_attribute(element, key):
|
2011-08-19 18:53:05 +04:00
|
|
|
|
"""Get the URL corresponding to the ``key`` attribute of ``element``.
|
|
|
|
|
|
|
|
|
|
The retrieved URL is absolute, even if the URL in the element is relative.
|
|
|
|
|
|
2011-08-09 14:45:51 +04:00
|
|
|
|
"""
|
2011-08-25 19:29:16 +04:00
|
|
|
|
attr_value = element.get(key)
|
2011-12-16 15:19:10 +04:00
|
|
|
|
if attr_value:
|
2012-02-27 19:48:27 +04:00
|
|
|
|
attr_value = attr_value.strip()
|
|
|
|
|
if attr_value:
|
|
|
|
|
return urljoin(element.base_url, attr_value)
|
2011-08-09 14:45:51 +04:00
|
|
|
|
|
2011-08-05 13:16:44 +04:00
|
|
|
|
|
2011-08-19 18:53:05 +04:00
|
|
|
|
def ensure_url(string):
|
|
|
|
|
"""Get a ``scheme://path`` URL from ``string``.
|
|
|
|
|
|
|
|
|
|
If ``string`` looks like an URL, return it unchanged. Otherwise assume a
|
|
|
|
|
filename and convert it to a ``file://`` URL.
|
|
|
|
|
|
2011-08-09 14:45:51 +04:00
|
|
|
|
"""
|
2012-01-16 16:12:27 +04:00
|
|
|
|
if urlparse(string).scheme:
|
|
|
|
|
return string
|
|
|
|
|
else:
|
|
|
|
|
return path2url(string.encode('utf8'))
|
2011-08-16 17:11:35 +04:00
|
|
|
|
|
|
|
|
|
|
2012-02-17 21:49:58 +04:00
|
|
|
|
def parse_data_url(url):
|
|
|
|
|
"""Decode URLs with the 'data' stream. urllib can handle them
|
|
|
|
|
in Python 2, but that is broken in Python 3.
|
|
|
|
|
|
|
|
|
|
Inspired from the Python 2.7.2’s urllib.py.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
# syntax of data URLs:
|
|
|
|
|
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
|
|
|
|
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
|
|
|
|
# data := *urlchar
|
|
|
|
|
# parameter := attribute "=" value
|
|
|
|
|
try:
|
|
|
|
|
header, data = url.split(',', 1)
|
|
|
|
|
except ValueError:
|
|
|
|
|
raise IOError('bad data URL')
|
|
|
|
|
header = header[5:] # len('data:') == 5
|
|
|
|
|
if header:
|
|
|
|
|
semi = header.rfind(';')
|
|
|
|
|
if semi >= 0 and '=' not in header[semi:]:
|
|
|
|
|
content_type = header[:semi]
|
|
|
|
|
encoding = header[semi+1:]
|
|
|
|
|
else:
|
|
|
|
|
content_type = header
|
|
|
|
|
encoding = ''
|
|
|
|
|
message = parse_email('Content-type: ' + content_type)
|
|
|
|
|
mime_type = message.get_content_type()
|
|
|
|
|
charset = message.get_content_charset()
|
|
|
|
|
else:
|
|
|
|
|
mime_type = 'text/plain'
|
|
|
|
|
charset = 'US-ASCII'
|
|
|
|
|
encoding = ''
|
|
|
|
|
|
|
|
|
|
if encoding == 'base64':
|
|
|
|
|
data = data.encode('ascii')
|
|
|
|
|
data = base64.decodestring(data)
|
|
|
|
|
else:
|
|
|
|
|
data = unquote_to_bytes(data)
|
|
|
|
|
|
|
|
|
|
return io.BytesIO(data), mime_type, charset
|
2011-08-19 18:53:05 +04:00
|
|
|
|
|
|
|
|
|
|
2011-10-17 17:04:13 +04:00
|
|
|
|
def urlopen(url):
|
|
|
|
|
"""Fetch an URL and return ``(file_like, mime_type, charset)``.
|
2011-12-08 19:31:03 +04:00
|
|
|
|
|
|
|
|
|
It is the caller’s responsability to call ``file_like.close()``.
|
2011-08-16 17:11:35 +04:00
|
|
|
|
"""
|
2012-02-17 21:49:58 +04:00
|
|
|
|
if url.startswith('data:'):
|
|
|
|
|
return parse_data_url(url)
|
2011-10-10 18:39:41 +04:00
|
|
|
|
else:
|
2012-02-17 21:49:58 +04:00
|
|
|
|
return urlopen_contenttype(Request(url,
|
|
|
|
|
headers={'User-Agent': HTTP_USER_AGENT}))
|
2011-10-17 17:04:13 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def urllib_fetcher(url):
|
|
|
|
|
"""URL fetcher for cssutils.
|
|
|
|
|
|
|
|
|
|
This fetcher is based on urllib instead of urllib2, since urllib has
|
|
|
|
|
support for the "data" URL scheme.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
file_like, mime_type, charset = urlopen(url)
|
|
|
|
|
if mime_type != 'text/css':
|
2011-12-16 15:19:10 +04:00
|
|
|
|
LOGGER.warn('Expected `text/css` for stylsheet at %s, got `%s`',
|
|
|
|
|
url, mime_type)
|
2011-10-17 17:04:13 +04:00
|
|
|
|
return None
|
2011-12-08 19:31:03 +04:00
|
|
|
|
content = file_like.read()
|
|
|
|
|
file_like.close()
|
|
|
|
|
return charset, content
|
2012-02-22 18:52:49 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class cached_property(object):
|
|
|
|
|
"""A decorator that converts a function into a lazy property. The
|
|
|
|
|
function wrapped is called the first time to retrieve the result
|
|
|
|
|
and then that calculated result is used the next time you access
|
|
|
|
|
the value.
|
|
|
|
|
|
|
|
|
|
Stolen from Werkzeug:
|
|
|
|
|
https://github.com/mitsuhiko/werkzeug/blob/7b8d887d33/werkzeug/utils.py#L28
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, func):
|
|
|
|
|
self.__name__ = func.__name__
|
|
|
|
|
self.__module__ = func.__module__
|
|
|
|
|
self.__doc__ = func.__doc__
|
|
|
|
|
self.func = func
|
|
|
|
|
|
|
|
|
|
def __get__(self, obj, type=None):
|
|
|
|
|
if obj is None:
|
|
|
|
|
return self
|
|
|
|
|
missing = object()
|
|
|
|
|
value = obj.__dict__.get(self.__name__, missing)
|
|
|
|
|
if value is missing:
|
|
|
|
|
value = self.func(obj)
|
|
|
|
|
obj.__dict__[self.__name__] = value
|
|
|
|
|
return value
|