mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 16:37:47 +03:00
2012-04-03 11:34:29 +02:00

502 lines
18 KiB
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding: utf8
This module takes care of steps 3 and 4 of “CSS 2.1 processing model”:
Retrieve stylesheets associated with a document and annotate every element
with a value for every CSS property.
This module does this in more than two steps. The
:func:`get_all_computed_styles` function does everything, but it itsef
calls a function for each step:
Find and parse all author stylesheets in a document.
Resolve @media and @import rules.
Find elements in a document that match a selector list.
Find and parse all `style` HTML attributes.
Remove ignored properties and expand shorthands.
Take applicable properties and only keep those with highest weight.
Handle initial values, inheritance and computed values for one element.
:copyright: Copyright 2011-2012 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
from __future__ import division, unicode_literals
import re
import tinycss
from tinycss.selectors3 import STYLE_ATTRIBUTE_SPECIFICITY
from lxml import cssselect
from . import properties
from . import validation
from . import computed_values
from ..utils import get_url_attribute
from ..logger import LOGGER
from ..compat import iteritems, urljoin
from .. import CSS
PARSER = tinycss.make_parser(with_selectors3=True, with_page3=True)
# Pseudo-classes and pseudo-elements are the same to lxml.cssselect.parse().
# List the identifiers for all CSS3 pseudo elements here to distinguish them.
PSEUDO_ELEMENTS = ('before', 'after', 'first-line', 'first-letter')
# Selectors for @page rules can have a pseudo-class, one of :first, :left
# or :right. This maps pseudo-classes to lists of "page types" selected.
'first': ['first_left_page', 'first_right_page'],
'left': ['left_page', 'first_left_page'],
'right': ['right_page', 'first_right_page'],
# no pseudo-class: all pages
None: ['left_page', 'right_page', 'first_left_page', 'first_right_page'],
# A test function that returns True if the given property name has an
# initial value that is not always the same when computed.
class StyleDict(object):
"""A mapping (dict-like) that allows attribute access to values.
Allow eg. ``style.font_size`` instead of ``style['font-size']``.
:param parent: if given, should be a mapping. Values missing from this
dict will be looked up in the parent dict. Setting a value
in this dict masks any value in the parent.
def __init__(self, data=None, parent=None):
if data is None:
data = {}
data = dict(data)
if parent is None:
parent = {}
# work around our own __setattr__
object.__setattr__(self, '_storage', data)
object.__setattr__(self, '_parent', parent)
def __getitem__(self, key):
storage = self._storage
if key in storage:
return storage[key]
return self._parent[key]
def __setitem__(self, key, value):
self._storage[key] = value
def updated_copy(self, other):
copy = self.copy()
return copy
def __contains__(self, key):
return key in self._parent or key in self._storage
__getattr__ = __getitem__ # May raise KeyError instead of AttributeError
__setattr__ = __setitem__
def copy(self):
"""Copy the ``StyleDict``.
Create a new StyleDict with this one as the parent. This is a cheap
"copy-on-write". Modifications in the copy will not affect
the original, but modifications in the original *may* affect the
if self._storage:
parent = self
parent = self._parent
style = type(self)(parent=parent)
if self.anonymous:
object.__setattr__(style, 'anonymous', True)
return style
def inherit_from(self):
"""Return a new StyleDict with inherited properties from this one.
Non-inherited properties get their initial values.
This is the styles for an anonymous box.
style = computed_from_cascaded(cascaded={}, parent_style=self,
# Only used by non-inherited properties. eg `content: attr(href)`
object.__setattr__(style, 'anonymous', True)
return style
# Default values, may be overriden on instances
anonymous = False
def find_stylesheets(document, medium):
"""Yield the stylesheets of ``document``.
The output order is the same as the order of the dom.
for element in document.dom.iter():
if element.tag not in ('style', 'link'):
mime_type = element.get('type', 'text/css').split(';', 1)[0].strip()
# Only keep 'type/subtype' from 'type/subtype ; param1; param2'.
if mime_type != 'text/css':
media_attr = element.get('media', '').strip() or 'all'
media = [media_type.strip() for media_type in media_attr.split(',')]
if not evaluate_media_query(media, medium):
if element.tag == 'style':
# Content is text that is directly in the <style> element, not its
# descendants
content = [element.text or '']
for child in element:
content.append(child.tail or '')
content = ''.join(content)
# lxml should give us either unicode or ASCII-only bytestrings, so
# we don't need `encoding` here.
css = CSS(string=content, base_url=element.base_url)
yield css
elif element.tag == 'link' and element.get('href'):
rel = element.get('rel', '').split()
if 'stylesheet' not in rel or 'alternate' in rel:
href = get_url_attribute(element, 'href')
css = CSS(url=href, _check_mime_type=True)
if css.mime_type == 'text/css':
yield css
LOGGER.warn('Unsupported stylesheet type: %s', css.mime_type)
def find_style_attributes(document):
Yield ``element, declaration, base_url`` for elements with
a "style" attribute.
parser = PARSER
for element in document.dom.iter():
style_attribute = element.get('style')
if style_attribute:
declarations, errors = parser.parse_style_attr(style_attribute)
for error in errors:
yield element, declarations, element.base_url
def evaluate_media_query(query_list, medium):
"""Return the boolean evaluation of `query_list` for the given `medium`.
:attr query_list: a cssutilts.stlysheets.MediaList
:attr medium: a media type string (for now)
# TODO: actual support for media queries, not just media types
return 'all' in query_list or medium in query_list
def declaration_precedence(origin, importance):
"""Return the precedence for a declaration.
Precedence values have no meaning unless compared to each other.
Acceptable values for ``origin`` are the strings ``'author'``, ``'user'``
and ``'user agent'``.
# See http://www.w3.org/TR/CSS21/cascade.html#cascading-order
if origin == 'user agent':
return 1
elif origin == 'user' and not importance:
return 2
elif origin == 'author' and not importance:
return 3
elif origin == 'author': # and importance
return 4
assert origin == 'user' # and importance
return 5
def add_declaration(cascaded_styles, prop_name, prop_values, weight, element,
"""Set the value for a property on a given element.
The value is only set if there is no value of greater weight defined yet.
style = cascaded_styles.setdefault((element, pseudo_type), {})
_values, previous_weight = style.get(prop_name, (None, None))
if previous_weight is None or previous_weight <= weight:
style[prop_name] = prop_values, weight
def set_computed_styles(cascaded_styles, computed_styles,
element, parent, pseudo_type=None):
"""Set the computed values of styles to ``element``.
Take the properties left by ``apply_style_rule`` on an element or
pseudo-element and assign computed values with respect to the cascade,
declaration priority (ie. ``!important``) and selector specificity.
if parent is None:
parent_style = None
parent_style = computed_styles[parent, None]
cascaded = cascaded_styles.get((element, pseudo_type), {})
style = computed_from_cascaded(
element, cascaded, parent_style, pseudo_type)
computed_styles[element, pseudo_type] = style
def computed_from_cascaded(element, cascaded, parent_style, pseudo_type=None):
"""Get a dict of computed style mixed from parent and cascaded styles."""
if not cascaded and parent_style is not None:
# Fast path for anonymous boxes:
# no cascaded style, only implicitly initial or inherited values.
computed = StyleDict(parent=properties.INITIAL_VALUES)
for name in properties.INHERITED:
computed[name] = parent_style[name]
# border-*-style is none, so border-width computes to zero.
# Other than that, properties that would need computing are
# border-*-color, but they do not apply.
for side in ('top', 'bottom', 'left', 'right'):
computed['border_%s_width' % side] = 0
return computed
# Handle inheritance and initial values
specified = StyleDict()
computed = StyleDict()
for name, initial in iteritems(properties.INITIAL_VALUES):
if name in cascaded:
value, _precedence = cascaded[name]
keyword = value
if name in properties.INHERITED:
keyword = 'inherit'
keyword = 'initial'
if keyword == 'inherit' and parent_style is None:
# On the root element, 'inherit' from initial values
keyword = 'initial'
if keyword == 'initial':
value = initial
# The value is the same as when computed
computed[name] = value
elif keyword == 'inherit':
value = parent_style[name]
# Values in parent_style are already computed.
computed[name] = value
assert value is not None
specified[name] = value
return computed_values.compute(
element, pseudo_type, specified, computed, parent_style)
class PageSelector(object):
"""Mimic the API of :class:`tinycss.selectors3.Selector`"""
def __init__(self, specificity, pseudo_element, matched):
self.specificity = specificity
self.pseudo_element = pseudo_element
self.match = lambda _document: matched
def preprocess_stylesheet(medium, base_url, rules):
"""Do the work that can be done early on stylesheet, before they are
in a document.
for rule in rules:
if not rule.at_keyword:
declarations = list(preprocess_declarations(
base_url, rule.declarations))
if declarations:
yield rule, rule.selector_list, declarations
elif rule.at_keyword == '@import':
if not evaluate_media_query(rule.media, medium):
for result in CSS(url=urljoin(base_url, rule.uri)).rules:
yield result
elif rule.at_keyword == '@media':
if not evaluate_media_query(rule.media, medium):
for result in preprocess_stylesheet(
medium, base_url, rule.rules):
yield result
elif rule.at_keyword == '@page':
page_name, pseudo_class = rule.selector
page_types = PAGE_PSEUDOCLASS_TARGETS[pseudo_class]
# TODO: support named pages (see CSS3 Paged Media)
if page_name is not None:
LOGGER.warn('Named pages are not supported yet, the whole '
'@page %s rule was ignored.', page_name + (
':' + pseudo_class if pseudo_class else ''))
specificity = rule.specificity
declarations = list(preprocess_declarations(
base_url, rule.declarations))
if declarations:
selector_list = [PageSelector(specificity, None, page_types)]
yield rule, selector_list, declarations
for margin_rule in rule.at_rules:
declarations = list(preprocess_declarations(
base_url, margin_rule.declarations))
if declarations:
selector_list = [PageSelector(
specificity, margin_rule.at_keyword, page_types)]
yield margin_rule, selector_list, declarations
def preprocess_declarations(base_url, declarations):
# set() + reversed(): only keep the last valid declaration,
# dont bother checking the previous ones for the same property
seen = set()
for declaration in reversed(declarations):
name = declaration.name.replace('-', '_')
if name in seen:
# This only helps on non-shorthands, but still
priority = declaration.priority
for long_name, values in validation.validate_and_expand(
base_url, name, declaration.value):
if long_name not in seen:
yield long_name, values, priority
def get_all_computed_styles(document, medium,
user_stylesheets=None, ua_stylesheets=None):
"""Compute all the computed styles of ``document`` for ``medium``.
Do everything from finding author stylesheets in the given HTML document
to parsing and applying them.
Return a dict of (DOM element, pseudo element type) -> StyleDict instance.
author_stylesheets = list(find_stylesheets(document, medium))
# keys: (element, pseudo_element_type)
# element: a lxml element object or the '@page' string for @page styles
# pseudo_element_type: a string such as 'first' (for @page) or 'after',
# or None for normal elements
# values: dicts of
# keys: property name as a string
# values: (values, weight)
# values: a PropertyValue-like object
# weight: values with a greater weight take precedence, see
# http://www.w3.org/TR/CSS21/cascade.html#cascading-order
cascaded_styles = {}
for sheets, origin in (
# Order here is not important ('origin' is).
# Use this order for a regression test
(ua_stylesheets or [], 'user agent'),
(author_stylesheets, 'author'),
(user_stylesheets or [], 'user'),
for sheet in sheets:
for _rule, selector_list, declarations in sheet.rules:
for selector in selector_list:
specificity = selector.specificity
pseudo_type = selector.pseudo_element
for element in selector.match(document.dom):
for name, values, importance in declarations:
precedence = declaration_precedence(
origin, importance)
weight = (precedence, specificity)
cascaded_styles, name, values, weight,
element, pseudo_type)
for element, declarations, base_url in find_style_attributes(document):
for name, values, importance in preprocess_declarations(
base_url, declarations):
precedence = declaration_precedence('author', importance)
weight = (precedence, specificity)
add_declaration(cascaded_styles, name, values, weight, element)
# keys: (element, pseudo_element_type), like cascaded_styles
# values: StyleDict objects:
# keys: property name as a string
# values: a PropertyValue-like object
computed_styles = {}
# First, computed styles for "real" elements *in tree order*
# Tree order is important so that parents have computed styles before
# their children, for inheritance.
# Iterate on all elements, even if there is no cascaded style for them.
for element in document.dom.iter():
set_computed_styles(cascaded_styles, computed_styles, element,
# Then computed styles for @page.
# Iterate on all possible page types, even if there is no cascaded style
# for them.
for page_type in PAGE_PSEUDOCLASS_TARGETS[None]:
set_computed_styles(cascaded_styles, computed_styles, page_type,
# @page inherits from the root element:
# http://lists.w3.org/Archives/Public/www-style/2012Jan/1164.html
# Then computed styles for pseudo elements, in any order.
# Pseudo-elements inherit from their associated element so they come
# last. Do them in a second pass as there is no easy way to iterate
# on the pseudo-elements for a given element with the current structure
# of cascaded_styles. (Keys are (element, pseudo_type) tuples.)
# Only iterate on pseudo-elements that have cascaded styles. (Others
# might as well not exist.)
for element, pseudo_type in cascaded_styles:
if pseudo_type:
set_computed_styles(cascaded_styles, computed_styles,
element, pseudo_type=pseudo_type,
# The pseudo-element inherits from the element.
return computed_styles