1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 08:27:22 +03:00
WeasyPrint/weasyprint/utils.py
2012-03-24 22:10:38 +01:00

163 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding: utf8
"""
weasyprint.utils
----------------
Various utility functions and classes.
:copyright: Copyright 2011-2012 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from __future__ import division, unicode_literals
import os.path
import io
import base64
from . import VERSION
from .logger import LOGGER
from .compat import (
urljoin, urlparse, unquote_to_bytes, urlopen_contenttype, Request,
parse_email, pathname2url)
# TODO: Most of this module is URL-related. Rename it to weasyprint.urls?
HTTP_USER_AGENT = 'WeasyPrint/%s http://weasyprint.org/' % VERSION
def path2url(path):
"""Return file URL of `path`"""
return 'file:' + pathname2url(os.path.abspath(path))
def get_url_attribute(element, key):
"""Get the URL corresponding to the ``key`` attribute of ``element``.
The retrieved URL is absolute, even if the URL in the element is relative.
"""
attr_value = element.get(key)
if attr_value:
attr_value = attr_value.strip()
if attr_value:
# TODO: support the <base> HTML element, but do not use
# lxml.html.HtmlElement.make_links_absolute() that changes
# the tree for content: attr(href)
return urljoin(element.base_url, attr_value)
def ensure_url(string):
"""Get a ``scheme://path`` URL from ``string``.
If ``string`` looks like an URL, return it unchanged. Otherwise assume a
filename and convert it to a ``file://`` URL.
"""
if urlparse(string).scheme:
return string
else:
return path2url(string.encode('utf8'))
def decode_base64(data):
"""Decode base64, padding being optional.
"From a theoretical point of view, the padding character is not needed,
since the number of missing bytes can be calculated from the number
of Base64 digits."
https://en.wikipedia.org/wiki/Base64#Padding
:param data: Base64 data as an ASCII byte string
:returns: The decoded byte string.
"""
missing_padding = 4 - len(data) % 4
if missing_padding:
data += b'='* missing_padding
return base64.decodestring(data)
def parse_data_url(url):
"""Decode URLs with the 'data' stream. urllib can handle them
in Python 2, but that is broken in Python 3.
Inspired from the Python 2.7.2s urllib.py.
"""
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
try:
header, data = url.split(',', 1)
except ValueError:
raise IOError('bad data URL')
header = header[5:] # len('data:') == 5
if header:
semi = header.rfind(';')
if semi >= 0 and '=' not in header[semi:]:
content_type = header[:semi]
encoding = header[semi+1:]
else:
content_type = header
encoding = ''
message = parse_email('Content-type: ' + content_type)
mime_type = message.get_content_type()
charset = message.get_content_charset()
else:
mime_type = 'text/plain'
charset = 'US-ASCII'
encoding = ''
data = unquote_to_bytes(data)
if encoding == 'base64':
data = decode_base64(data)
return io.BytesIO(data), mime_type, charset
def urlopen(url):
"""Fetch an URL and return ``(file_like, mime_type, charset)``.
It is the callers responsability to call ``file_like.close()``.
"""
if url.startswith('data:'):
return parse_data_url(url)
else:
return urlopen_contenttype(Request(url,
headers={'User-Agent': HTTP_USER_AGENT}))
class cached_property(object):
"""A decorator that converts a function into a lazy property. The
function wrapped is called the first time to retrieve the result
and then that calculated result is used the next time you access
the value.
Stolen from Werkzeug:
https://github.com/mitsuhiko/werkzeug/blob/7b8d887d33/werkzeug/utils.py#L28
"""
def __init__(self, func):
self.__name__ = func.__name__
self.__module__ = func.__module__
self.__doc__ = func.__doc__
self.func = func
def __get__(self, obj, type=None):
if obj is None:
return self
missing = object()
value = obj.__dict__.get(self.__name__, missing)
if value is missing:
value = self.func(obj)
obj.__dict__[self.__name__] = value
return value