1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 16:37:47 +03:00
WeasyPrint/weasyprint/css/__init__.py

500 lines
18 KiB
Python
Raw Normal View History

# coding: utf8
"""
weasyprint.css
--------------
This module takes care of steps 3 and 4 of CSS 2.1 processing model:
Retrieve stylesheets associated with a document and annotate every element
with a value for every CSS property.
http://www.w3.org/TR/CSS21/intro.html#processing-model
This module does this in more than two steps. The
:func:`get_all_computed_styles` function does everything, but it itsef
calls a function for each step:
``find_stylesheets``
Find and parse all author stylesheets in a document.
``effective_rules``
Resolve @media and @import rules.
``match_selectors``
Find elements in a document that match a selector list.
``find_style_attributes``
Find and parse all `style` HTML attributes.
``effective_declarations``
Remove ignored properties and expand shorthands.
``add_property``
Take applicable properties and only keep those with highest weight.
``set_computed_styles``
Handle initial values, inheritance and computed values for one element.
:copyright: Copyright 2011-2012 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
2011-08-22 20:33:54 +04:00
"""
2011-05-12 18:06:47 +04:00
from __future__ import division, unicode_literals
2011-12-16 20:53:11 +04:00
import re
import tinycss
2012-04-25 21:05:42 +04:00
import cssselect
import lxml.etree
2011-04-26 20:07:19 +04:00
from . import properties
from . import computed_values
2012-04-05 18:31:32 +04:00
from .validation import preprocess_declarations
2012-05-22 19:44:16 +04:00
from ..urls import get_url_attribute
from ..logger import LOGGER
from ..compat import iteritems, urljoin
from .. import CSS
2011-05-12 18:06:47 +04:00
2012-04-25 21:05:42 +04:00
PARSER = tinycss.make_parser('page3')
2012-04-25 21:05:42 +04:00
# Reject anything not in here:
PSEUDO_ELEMENTS = (None, 'before', 'after', 'first-line', 'first-letter')
# Selectors for @page rules can have a pseudo-class, one of :first, :left
# or :right. This maps pseudo-classes to lists of "page types" selected.
PAGE_PSEUDOCLASS_TARGETS = {
'first': ['first_left_page', 'first_right_page'],
'left': ['left_page', 'first_left_page'],
'right': ['right_page', 'first_right_page'],
# no pseudo-class: all pages
None: ['left_page', 'right_page', 'first_left_page', 'first_right_page'],
}
2011-12-16 20:53:11 +04:00
# A test function that returns True if the given property name has an
# initial value that is not always the same when computed.
RE_INITIAL_NOT_COMPUTED = re.compile(
'^(display|border_[a-z]+_(width|color))$').match
class StyleDict(object):
"""A mapping (dict-like) that allows attribute access to values.
Allow eg. ``style.font_size`` instead of ``style['font-size']``.
:param parent: if given, should be a mapping. Values missing from this
dict will be looked up in the parent dict. Setting a value
in this dict masks any value in the parent.
"""
def __init__(self, data=None, parent=None):
if data is None:
data = {}
else:
data = dict(data)
if parent is None:
parent = {}
# work around our own __setattr__
object.__setattr__(self, '_storage', data)
object.__setattr__(self, '_parent', parent)
def __getitem__(self, key):
storage = self._storage
if key in storage:
return storage[key]
else:
return self._parent[key]
def __setitem__(self, key, value):
self._storage[key] = value
2012-02-07 19:59:22 +04:00
def updated_copy(self, other):
copy = self.copy()
copy._storage.update(other)
return copy
def __contains__(self, key):
return key in self._parent or key in self._storage
__getattr__ = __getitem__ # May raise KeyError instead of AttributeError
__setattr__ = __setitem__
def copy(self):
"""Copy the ``StyleDict``.
Create a new StyleDict with this one as the parent. This is a cheap
"copy-on-write". Modifications in the copy will not affect
the original, but modifications in the original *may* affect the
copy.
"""
if self._storage:
parent = self
else:
parent = self._parent
style = type(self)(parent=parent)
if self.anonymous:
object.__setattr__(style, 'anonymous', True)
return style
def inherit_from(self):
"""Return a new StyleDict with inherited properties from this one.
Non-inherited properties get their initial values.
This is the styles for an anonymous box.
"""
style = computed_from_cascaded(cascaded={}, parent_style=self,
# Only used by non-inherited properties. eg `content: attr(href)`
element=None)
object.__setattr__(style, 'anonymous', True)
return style
# Default values, may be overriden on instances
anonymous = False
2012-07-12 17:21:37 +04:00
def find_stylesheets(element_tree, medium):
"""Yield the stylesheets in ``element_tree``.
2011-08-22 20:33:54 +04:00
2012-07-12 17:21:37 +04:00
The output order is the same as the source order.
2011-08-22 20:33:54 +04:00
2011-04-26 20:07:19 +04:00
"""
2012-07-12 17:21:37 +04:00
for element in element_tree.iter():
if element.tag not in ('style', 'link'):
continue
2012-04-02 16:45:44 +04:00
mime_type = element.get('type', 'text/css').split(';', 1)[0].strip()
# Only keep 'type/subtype' from 'type/subtype ; param1; param2'.
2012-04-02 16:45:44 +04:00
if mime_type != 'text/css':
2011-05-10 13:41:23 +04:00
continue
media_attr = element.get('media', '').strip() or 'all'
media = [media_type.strip() for media_type in media_attr.split(',')]
2012-04-02 16:45:44 +04:00
if not evaluate_media_query(media, medium):
continue
2011-05-10 13:41:23 +04:00
if element.tag == 'style':
# Content is text that is directly in the <style> element, not its
# descendants
content = [element.text or '']
2011-05-10 13:41:23 +04:00
for child in element:
content.append(child.tail or '')
2011-05-10 13:41:23 +04:00
content = ''.join(content)
# lxml should give us either unicode or ASCII-only bytestrings, so
# we don't need `encoding` here.
css = CSS(string=content, base_url=element.base_url)
yield css
2012-04-02 16:45:44 +04:00
elif element.tag == 'link' and element.get('href'):
2011-11-08 20:31:32 +04:00
rel = element.get('rel', '').split()
if 'stylesheet' not in rel or 'alternate' in rel:
continue
2011-08-05 13:16:44 +04:00
href = get_url_attribute(element, 'href')
yield CSS(url=href, _check_mime_type=True)
2011-04-27 19:50:12 +04:00
2012-07-12 17:21:37 +04:00
def find_style_attributes(element_tree):
"""
Yield ``element, declaration, base_url`` for elements with
a "style" attribute.
"""
parser = PARSER
2012-07-12 17:21:37 +04:00
for element in element_tree.iter():
style_attribute = element.get('style')
if style_attribute:
declarations, errors = parser.parse_style_attr(style_attribute)
for error in errors:
LOGGER.warn(error)
yield element, declarations, element.base_url
2011-04-27 19:50:12 +04:00
def evaluate_media_query(query_list, medium):
2011-08-22 20:33:54 +04:00
"""Return the boolean evaluation of `query_list` for the given `medium`.
2011-04-27 19:50:12 +04:00
:attr query_list: a cssutilts.stlysheets.MediaList
:attr medium: a media type string (for now)
2011-04-27 19:50:12 +04:00
"""
# TODO: actual support for media queries, not just media types
return 'all' in query_list or medium in query_list
2011-04-27 19:50:12 +04:00
def declaration_precedence(origin, importance):
2011-08-22 20:33:54 +04:00
"""Return the precedence for a declaration.
Precedence values have no meaning unless compared to each other.
Acceptable values for ``origin`` are the strings ``'author'``, ``'user'``
and ``'user agent'``.
"""
# See http://www.w3.org/TR/CSS21/cascade.html#cascading-order
if origin == 'user agent':
return 1
elif origin == 'user' and not importance:
return 2
elif origin == 'author' and not importance:
return 3
2011-08-22 20:33:54 +04:00
elif origin == 'author': # and importance
return 4
else:
2012-04-02 16:45:44 +04:00
assert origin == 'user' # and importance
return 5
2011-05-12 18:06:47 +04:00
def add_declaration(cascaded_styles, prop_name, prop_values, weight, element,
pseudo_type=None):
2011-08-22 20:33:54 +04:00
"""Set the value for a property on a given element.
The value is only set if there is no value of greater weight defined yet.
"""
style = cascaded_styles.setdefault((element, pseudo_type), {})
_values, previous_weight = style.get(prop_name, (None, None))
if previous_weight is None or previous_weight <= weight:
style[prop_name] = prop_values, weight
def set_computed_styles(cascaded_styles, computed_styles,
2012-01-27 14:02:22 +04:00
element, parent, pseudo_type=None):
2011-08-22 20:33:54 +04:00
"""Set the computed values of styles to ``element``.
2011-05-23 15:59:47 +04:00
Take the properties left by ``apply_style_rule`` on an element or
pseudo-element and assign computed values with respect to the cascade,
declaration priority (ie. ``!important``) and selector specificity.
2011-08-22 20:33:54 +04:00
"""
if parent is None:
parent_style = None
else:
parent_style = computed_styles[parent, None]
cascaded = cascaded_styles.get((element, pseudo_type), {})
2011-08-22 20:33:54 +04:00
style = computed_from_cascaded(
element, cascaded, parent_style, pseudo_type)
computed_styles[element, pseudo_type] = style
2011-05-12 18:06:47 +04:00
def computed_from_cascaded(element, cascaded, parent_style, pseudo_type=None):
2011-08-22 20:33:54 +04:00
"""Get a dict of computed style mixed from parent and cascaded styles."""
if not cascaded and parent_style is not None:
# Fast path for anonymous boxes:
2011-08-22 20:33:54 +04:00
# no cascaded style, only implicitly initial or inherited values.
computed = StyleDict(parent=properties.INITIAL_VALUES)
for name in properties.INHERITED:
computed[name] = parent_style[name]
# border-*-style is none, so border-width computes to zero.
# Other than that, properties that would need computing are
# border-*-color, but they do not apply.
for side in ('top', 'bottom', 'left', 'right'):
computed['border_%s_width' % side] = 0
return computed
# Handle inheritance and initial values
specified = StyleDict()
computed = StyleDict()
for name, initial in iteritems(properties.INITIAL_VALUES):
if name in cascaded:
value, _precedence = cascaded[name]
keyword = value
else:
if name in properties.INHERITED:
keyword = 'inherit'
else:
keyword = 'initial'
if keyword == 'inherit' and parent_style is None:
# On the root element, 'inherit' from initial values
keyword = 'initial'
if keyword == 'initial':
value = initial
2011-12-16 20:53:11 +04:00
if not RE_INITIAL_NOT_COMPUTED(name):
# The value is the same as when computed
computed[name] = value
elif keyword == 'inherit':
value = parent_style[name]
# Values in parent_style are already computed.
computed[name] = value
specified[name] = value
return computed_values.compute(
element, pseudo_type, specified, computed, parent_style)
2011-05-12 18:06:47 +04:00
2012-04-25 21:05:42 +04:00
class Selector(object):
def __init__(self, specificity, pseudo_element, match):
2012-03-25 04:41:02 +04:00
self.specificity = specificity
self.pseudo_element = pseudo_element
2012-04-25 21:05:42 +04:00
self.match = match
2012-03-25 04:41:02 +04:00
def preprocess_stylesheet(medium, base_url, rules):
"""Do the work that can be done early on stylesheet, before they are
in a document.
"""
2012-04-25 21:05:42 +04:00
selector_to_xpath = cssselect.HTMLTranslator().selector_to_xpath
for rule in rules:
2012-03-25 04:41:02 +04:00
if not rule.at_keyword:
declarations = list(preprocess_declarations(
base_url, rule.declarations))
if declarations:
2012-04-25 21:05:42 +04:00
selector_string = rule.selector.as_css()
try:
selector_list = [
Selector(
(0,) + selector.specificity(),
selector.pseudo_element,
lxml.etree.XPath(selector_to_xpath(selector)))
for selector in cssselect.parse(selector_string)
]
for selector in selector_list:
if selector.pseudo_element not in PSEUDO_ELEMENTS:
raise cssselect.ExpressionError(
'Unknown pseudo-element: %s'
% selector.pseudo_element)
except cssselect.SelectorError as exc:
LOGGER.warn("Invalid or unsupported selector '%s', %s",
selector_string, exc)
continue
yield rule, selector_list, declarations
2012-03-25 04:41:02 +04:00
elif rule.at_keyword == '@import':
if not evaluate_media_query(rule.media, medium):
continue
for result in CSS(url=urljoin(base_url, rule.uri)).rules:
yield result
elif rule.at_keyword == '@media':
if not evaluate_media_query(rule.media, medium):
continue
for result in preprocess_stylesheet(
2012-03-28 20:42:47 +04:00
medium, base_url, rule.rules):
2012-03-25 04:41:02 +04:00
yield result
elif rule.at_keyword == '@page':
page_name, pseudo_class = rule.selector
# TODO: support named pages (see CSS3 Paged Media)
if page_name is not None:
LOGGER.warn('Named pages are not supported yet, the whole '
'@page %s rule was ignored.', page_name + (
':' + pseudo_class if pseudo_class else ''))
continue
declarations = list(preprocess_declarations(
base_url, rule.declarations))
2012-04-25 21:05:42 +04:00
2012-07-12 17:21:37 +04:00
# Use a double lambda to have a closure that holds page_types
2012-04-25 21:05:42 +04:00
match = (lambda page_types: lambda _document: page_types)(
PAGE_PSEUDOCLASS_TARGETS[pseudo_class])
specificity = rule.specificity
2012-03-25 04:41:02 +04:00
if declarations:
2012-04-25 21:05:42 +04:00
selector_list = [Selector(specificity, None, match)]
2012-03-25 04:41:02 +04:00
yield rule, selector_list, declarations
for margin_rule in rule.at_rules:
declarations = list(preprocess_declarations(
base_url, margin_rule.declarations))
if declarations:
2012-04-25 21:05:42 +04:00
selector_list = [Selector(
specificity, margin_rule.at_keyword, match)]
2012-03-25 04:41:02 +04:00
yield margin_rule, selector_list, declarations
2012-07-12 17:21:37 +04:00
def get_all_computed_styles(element_tree, medium,
user_stylesheets=None, ua_stylesheets=None):
2012-07-12 17:21:37 +04:00
"""Compute all the computed styles of all elements in ``element_tree``
for the media type ``medium``.
2011-08-22 20:33:54 +04:00
Do everything from finding author stylesheets in the given HTML document
to parsing and applying them.
2012-07-12 17:21:37 +04:00
Return a dict of (element, pseudo element type) -> StyleDict instance.
2011-08-22 20:33:54 +04:00
"""
2012-07-12 17:21:37 +04:00
author_stylesheets = list(find_stylesheets(element_tree, medium))
2011-07-05 17:02:18 +04:00
2011-07-21 16:32:43 +04:00
# keys: (element, pseudo_element_type)
# element: a lxml element object or the '@page' string for @page styles
# pseudo_element_type: a string such as 'first' (for @page) or 'after',
# or None for normal elements
# values: dicts of
# keys: property name as a string
# values: (values, weight)
# values: a PropertyValue-like object
# weight: values with a greater weight take precedence, see
# http://www.w3.org/TR/CSS21/cascade.html#cascading-order
cascaded_styles = {}
for sheets, origin in (
# Order here is not important ('origin' is).
# Use this order for a regression test
(ua_stylesheets or [], 'user agent'),
(author_stylesheets, 'author'),
(user_stylesheets or [], 'user'),
):
for sheet in sheets:
2012-03-25 04:41:02 +04:00
for _rule, selector_list, declarations in sheet.rules:
for selector in selector_list:
specificity = selector.specificity
pseudo_type = selector.pseudo_element
2012-07-12 17:21:37 +04:00
for element in selector.match(element_tree):
for name, values, importance in declarations:
precedence = declaration_precedence(
origin, importance)
weight = (precedence, specificity)
add_declaration(
cascaded_styles, name, values, weight,
element, pseudo_type)
2012-04-25 21:05:42 +04:00
specificity = (1, 0, 0, 0)
2012-07-12 17:21:37 +04:00
for element, declarations, base_url in find_style_attributes(element_tree):
for name, values, importance in preprocess_declarations(
base_url, declarations):
precedence = declaration_precedence('author', importance)
2012-03-31 20:22:55 +04:00
weight = (precedence, specificity)
add_declaration(cascaded_styles, name, values, weight, element)
2011-07-21 16:32:43 +04:00
# keys: (element, pseudo_element_type), like cascaded_styles
# values: StyleDict objects:
# keys: property name as a string
# values: a PropertyValue-like object
computed_styles = {}
# First, computed styles for "real" elements *in tree order*
# Tree order is important so that parents have computed styles before
# their children, for inheritance.
# Iterate on all elements, even if there is no cascaded style for them.
2012-07-12 17:21:37 +04:00
for element in element_tree.iter():
2012-01-27 14:02:22 +04:00
set_computed_styles(cascaded_styles, computed_styles, element,
parent=element.getparent())
# Then computed styles for @page.
# Iterate on all possible page types, even if there is no cascaded style
# for them.
for page_type in PAGE_PSEUDOCLASS_TARGETS[None]:
2012-01-27 14:02:22 +04:00
set_computed_styles(cascaded_styles, computed_styles, page_type,
# @page inherits from the root element:
# http://lists.w3.org/Archives/Public/www-style/2012Jan/1164.html
2012-07-12 17:21:37 +04:00
parent=element_tree)
# Then computed styles for pseudo elements, in any order.
# Pseudo-elements inherit from their associated element so they come
# last. Do them in a second pass as there is no easy way to iterate
# on the pseudo-elements for a given element with the current structure
# of cascaded_styles. (Keys are (element, pseudo_type) tuples.)
# Only iterate on pseudo-elements that have cascaded styles. (Others
# might as well not exist.)
for element, pseudo_type in cascaded_styles:
if pseudo_type:
set_computed_styles(cascaded_styles, computed_styles,
2012-01-27 14:02:22 +04:00
element, pseudo_type=pseudo_type,
# The pseudo-element inherits from the element.
parent=element)
return computed_styles