2011-06-30 00:34:01 +04:00
|
|
|
|
# coding: utf8
|
|
|
|
|
|
|
|
|
|
# WeasyPrint converts web documents (HTML, CSS, ...) to PDF.
|
|
|
|
|
# Copyright (C) 2011 Simon Sapin
|
|
|
|
|
#
|
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
2011-08-16 17:11:35 +04:00
|
|
|
|
|
2011-08-19 18:53:05 +04:00
|
|
|
|
"""
|
|
|
|
|
Various utils.
|
|
|
|
|
|
|
|
|
|
"""
|
2011-06-30 00:34:01 +04:00
|
|
|
|
|
2011-08-19 18:53:05 +04:00
|
|
|
|
import urllib
|
|
|
|
|
from urlparse import urljoin, urlparse
|
2011-06-30 00:34:01 +04:00
|
|
|
|
|
2011-08-09 14:45:51 +04:00
|
|
|
|
from cssutils.helper import path2url
|
|
|
|
|
|
2011-10-17 17:04:13 +04:00
|
|
|
|
from . import VERSION
|
|
|
|
|
|
2011-08-09 14:45:51 +04:00
|
|
|
|
|
2011-08-05 13:16:44 +04:00
|
|
|
|
def get_url_attribute(element, key):
|
2011-08-19 18:53:05 +04:00
|
|
|
|
"""Get the URL corresponding to the ``key`` attribute of ``element``.
|
|
|
|
|
|
|
|
|
|
The retrieved URL is absolute, even if the URL in the element is relative.
|
|
|
|
|
|
2011-08-09 14:45:51 +04:00
|
|
|
|
"""
|
2011-08-25 19:29:16 +04:00
|
|
|
|
attr_value = element.get(key)
|
|
|
|
|
if attr_value is None:
|
|
|
|
|
return None
|
|
|
|
|
return urljoin(element.base_url, attr_value.strip())
|
2011-08-09 14:45:51 +04:00
|
|
|
|
|
2011-08-05 13:16:44 +04:00
|
|
|
|
|
2011-08-19 18:53:05 +04:00
|
|
|
|
def ensure_url(string):
|
|
|
|
|
"""Get a ``scheme://path`` URL from ``string``.
|
|
|
|
|
|
|
|
|
|
If ``string`` looks like an URL, return it unchanged. Otherwise assume a
|
|
|
|
|
filename and convert it to a ``file://`` URL.
|
|
|
|
|
|
2011-08-09 14:45:51 +04:00
|
|
|
|
"""
|
2011-08-19 18:53:05 +04:00
|
|
|
|
return string if urlparse(string).scheme else path2url(string)
|
2011-08-16 17:11:35 +04:00
|
|
|
|
|
|
|
|
|
|
2011-10-17 17:04:13 +04:00
|
|
|
|
class URLopener(urllib.FancyURLopener):
|
|
|
|
|
# User-Agent
|
|
|
|
|
version = 'WeasyPrint/%s http://weasyprint.org/' % VERSION
|
2011-08-19 18:53:05 +04:00
|
|
|
|
|
|
|
|
|
|
2011-10-17 17:04:13 +04:00
|
|
|
|
def urlopen(url):
|
|
|
|
|
"""Fetch an URL and return ``(file_like, mime_type, charset)``.
|
2011-12-08 19:31:03 +04:00
|
|
|
|
|
|
|
|
|
It is the caller’s responsability to call ``file_like.close()``.
|
2011-08-16 17:11:35 +04:00
|
|
|
|
"""
|
2011-10-17 17:04:13 +04:00
|
|
|
|
file_like = URLopener().open(url)
|
|
|
|
|
info = file_like.info()
|
2011-10-10 18:39:41 +04:00
|
|
|
|
if hasattr(info, 'get_content_type'):
|
|
|
|
|
# Python 3
|
|
|
|
|
mime_type = info.get_content_type()
|
|
|
|
|
else:
|
|
|
|
|
# Python 2
|
|
|
|
|
mime_type = info.gettype()
|
|
|
|
|
if hasattr(info, 'get_param'):
|
|
|
|
|
# Python 3
|
|
|
|
|
charset = info.get_param('charset')
|
|
|
|
|
else:
|
|
|
|
|
# Python 2
|
|
|
|
|
charset = info.getparam('charset')
|
2011-12-08 19:31:03 +04:00
|
|
|
|
return file_like.fp, mime_type, charset
|
2011-10-17 17:04:13 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def urllib_fetcher(url):
|
|
|
|
|
"""URL fetcher for cssutils.
|
|
|
|
|
|
|
|
|
|
This fetcher is based on urllib instead of urllib2, since urllib has
|
|
|
|
|
support for the "data" URL scheme.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
file_like, mime_type, charset = urlopen(url)
|
|
|
|
|
if mime_type != 'text/css':
|
|
|
|
|
# TODO: add a warning
|
|
|
|
|
return None
|
2011-12-08 19:31:03 +04:00
|
|
|
|
content = file_like.read()
|
|
|
|
|
file_like.close()
|
|
|
|
|
return charset, content
|