2012-03-22 02:19:27 +04:00
|
|
|
|
"""
|
|
|
|
|
weasyprint.css
|
|
|
|
|
--------------
|
2011-04-28 21:15:30 +04:00
|
|
|
|
|
2012-03-22 02:19:27 +04:00
|
|
|
|
This module takes care of steps 3 and 4 of “CSS 2.1 processing model”:
|
|
|
|
|
Retrieve stylesheets associated with a document and annotate every element
|
|
|
|
|
with a value for every CSS property.
|
2011-04-28 21:15:30 +04:00
|
|
|
|
|
2012-03-22 02:19:27 +04:00
|
|
|
|
http://www.w3.org/TR/CSS21/intro.html#processing-model
|
|
|
|
|
|
|
|
|
|
This module does this in more than two steps. The
|
2012-10-09 19:10:45 +04:00
|
|
|
|
:func:`get_all_computed_styles` function does everything, but it is itsef
|
|
|
|
|
based on other functions in this module.
|
2012-03-22 02:19:27 +04:00
|
|
|
|
|
2018-09-20 19:41:37 +03:00
|
|
|
|
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
|
2012-03-22 02:19:27 +04:00
|
|
|
|
:license: BSD, see LICENSE for details.
|
2011-08-22 20:33:54 +04:00
|
|
|
|
|
2011-05-04 13:47:04 +04:00
|
|
|
|
"""
|
2011-05-12 18:06:47 +04:00
|
|
|
|
|
2017-07-18 00:40:59 +03:00
|
|
|
|
from collections import namedtuple
|
2019-01-04 03:22:49 +03:00
|
|
|
|
from logging import DEBUG, WARNING
|
2011-08-16 01:22:16 +04:00
|
|
|
|
|
2017-06-30 18:54:02 +03:00
|
|
|
|
import cssselect2
|
2017-03-26 12:42:50 +03:00
|
|
|
|
import tinycss2
|
2011-04-26 20:07:19 +04:00
|
|
|
|
|
2011-05-10 12:50:04 +04:00
|
|
|
|
from . import computed_values
|
2018-04-14 14:44:10 +03:00
|
|
|
|
from . import properties
|
2017-10-13 10:48:57 +03:00
|
|
|
|
from .properties import INITIAL_NOT_COMPUTED
|
2018-04-15 11:28:14 +03:00
|
|
|
|
from .utils import remove_whitespace, split_on_comma
|
2018-03-31 23:13:08 +03:00
|
|
|
|
from .validation import preprocess_declarations
|
|
|
|
|
from .validation.descriptors import preprocess_descriptors
|
2019-01-04 01:02:44 +03:00
|
|
|
|
from ..logger import LOGGER, PROGRESS_LOGGER
|
2017-07-13 18:39:29 +03:00
|
|
|
|
from ..urls import get_url_attribute, url_join, URLFetchingError
|
2012-03-24 16:39:31 +04:00
|
|
|
|
from .. import CSS
|
2011-05-12 18:06:47 +04:00
|
|
|
|
|
|
|
|
|
|
2012-04-25 21:05:42 +04:00
|
|
|
|
# Reject anything not in here:
|
|
|
|
|
PSEUDO_ELEMENTS = (None, 'before', 'after', 'first-line', 'first-letter')
|
2011-08-15 15:03:09 +04:00
|
|
|
|
|
|
|
|
|
|
2017-07-18 00:40:59 +03:00
|
|
|
|
PageType = namedtuple('PageType', ['side', 'blank', 'first', 'name'])
|
|
|
|
|
|
|
|
|
|
|
2013-07-14 15:08:02 +04:00
|
|
|
|
def get_child_text(element):
|
|
|
|
|
"""Return the text directly in the element, not descendants."""
|
|
|
|
|
content = [element.text] if element.text else []
|
|
|
|
|
for child in element:
|
|
|
|
|
if child.tail:
|
|
|
|
|
content.append(child.tail)
|
|
|
|
|
return ''.join(content)
|
|
|
|
|
|
|
|
|
|
|
2017-07-07 12:14:07 +03:00
|
|
|
|
def find_stylesheets(wrapper_element, device_media_type, url_fetcher, base_url,
|
2017-07-03 16:19:05 +03:00
|
|
|
|
font_config, page_rules):
|
2012-07-12 17:21:37 +04:00
|
|
|
|
"""Yield the stylesheets in ``element_tree``.
|
2011-08-22 20:33:54 +04:00
|
|
|
|
|
2012-07-12 17:21:37 +04:00
|
|
|
|
The output order is the same as the source order.
|
2011-08-22 20:33:54 +04:00
|
|
|
|
|
2011-04-26 20:07:19 +04:00
|
|
|
|
"""
|
2014-04-22 04:34:47 +04:00
|
|
|
|
from ..html import element_has_link_type # Work around circular imports.
|
|
|
|
|
|
2017-07-03 16:19:05 +03:00
|
|
|
|
for wrapper in wrapper_element.query_all('style', 'link'):
|
|
|
|
|
element = wrapper.etree_element
|
2012-04-02 16:45:44 +04:00
|
|
|
|
mime_type = element.get('type', 'text/css').split(';', 1)[0].strip()
|
2011-05-10 17:09:55 +04:00
|
|
|
|
# Only keep 'type/subtype' from 'type/subtype ; param1; param2'.
|
2012-04-02 16:45:44 +04:00
|
|
|
|
if mime_type != 'text/css':
|
2011-05-10 13:41:23 +04:00
|
|
|
|
continue
|
2012-03-24 16:39:31 +04:00
|
|
|
|
media_attr = element.get('media', '').strip() or 'all'
|
|
|
|
|
media = [media_type.strip() for media_type in media_attr.split(',')]
|
2012-08-02 15:04:31 +04:00
|
|
|
|
if not evaluate_media_query(media, device_media_type):
|
2012-04-02 16:45:44 +04:00
|
|
|
|
continue
|
2011-05-10 13:41:23 +04:00
|
|
|
|
if element.tag == 'style':
|
|
|
|
|
# Content is text that is directly in the <style> element, not its
|
|
|
|
|
# descendants
|
2013-07-14 15:08:02 +04:00
|
|
|
|
content = get_child_text(element)
|
2017-06-30 23:48:47 +03:00
|
|
|
|
# ElementTree should give us either unicode or ASCII-only
|
|
|
|
|
# bytestrings, so we don't need `encoding` here.
|
2016-10-27 12:41:34 +03:00
|
|
|
|
css = CSS(
|
2017-07-07 12:14:07 +03:00
|
|
|
|
string=content, base_url=base_url,
|
2016-10-27 12:41:34 +03:00
|
|
|
|
url_fetcher=url_fetcher, media_type=device_media_type,
|
2017-06-30 18:54:02 +03:00
|
|
|
|
font_config=font_config, page_rules=page_rules)
|
2012-03-24 16:39:31 +04:00
|
|
|
|
yield css
|
2012-04-02 16:45:44 +04:00
|
|
|
|
elif element.tag == 'link' and element.get('href'):
|
2014-04-22 04:34:47 +04:00
|
|
|
|
if not element_has_link_type(element, 'stylesheet') or \
|
|
|
|
|
element_has_link_type(element, 'alternate'):
|
2011-11-08 20:31:32 +04:00
|
|
|
|
continue
|
2017-07-07 12:14:07 +03:00
|
|
|
|
href = get_url_attribute(element, 'href', base_url)
|
2012-07-17 17:23:58 +04:00
|
|
|
|
if href is not None:
|
2013-06-21 01:07:52 +04:00
|
|
|
|
try:
|
2016-10-27 12:41:34 +03:00
|
|
|
|
yield CSS(
|
|
|
|
|
url=href, url_fetcher=url_fetcher,
|
|
|
|
|
_check_mime_type=True, media_type=device_media_type,
|
2017-06-30 18:54:02 +03:00
|
|
|
|
font_config=font_config, page_rules=page_rules)
|
2013-06-21 01:07:52 +04:00
|
|
|
|
except URLFetchingError as exc:
|
2017-07-25 14:59:56 +03:00
|
|
|
|
LOGGER.error(
|
|
|
|
|
'Failed to load stylesheet at %s : %s', href, exc)
|
2011-04-27 19:50:12 +04:00
|
|
|
|
|
|
|
|
|
|
2017-07-03 16:19:05 +03:00
|
|
|
|
def find_style_attributes(tree, presentational_hints=False, base_url=None):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
"""Yield ``specificity, (element, declaration, base_url)`` rules.
|
2016-08-29 18:50:07 +03:00
|
|
|
|
|
2016-08-30 20:10:53 +03:00
|
|
|
|
Rules from "style" attribute are returned with specificity
|
2017-06-30 18:54:02 +03:00
|
|
|
|
``(1, 0, 0)``.
|
2016-08-30 20:10:53 +03:00
|
|
|
|
|
|
|
|
|
If ``presentational_hints`` is ``True``, rules from presentational hints
|
2017-06-30 18:54:02 +03:00
|
|
|
|
are returned with specificity ``(0, 0, 0)``.
|
2016-08-29 18:50:07 +03:00
|
|
|
|
|
|
|
|
|
"""
|
2017-07-03 16:19:05 +03:00
|
|
|
|
def check_style_attribute(element, style_attribute):
|
|
|
|
|
declarations = tinycss2.parse_declaration_list(style_attribute)
|
|
|
|
|
return element, declarations, base_url
|
|
|
|
|
|
|
|
|
|
for element in tree.iter():
|
2017-06-30 18:54:02 +03:00
|
|
|
|
specificity = (1, 0, 0)
|
2016-08-30 20:10:53 +03:00
|
|
|
|
style_attribute = element.get('style')
|
|
|
|
|
if style_attribute:
|
2017-07-03 16:19:05 +03:00
|
|
|
|
yield specificity, check_style_attribute(element, style_attribute)
|
2016-08-30 20:10:53 +03:00
|
|
|
|
if not presentational_hints:
|
|
|
|
|
continue
|
2017-06-30 18:54:02 +03:00
|
|
|
|
specificity = (0, 0, 0)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.tag == 'body':
|
2016-09-01 03:11:33 +03:00
|
|
|
|
# TODO: we should check the container frame element
|
|
|
|
|
for part, position in (
|
2016-08-29 18:50:07 +03:00
|
|
|
|
('height', 'top'), ('height', 'bottom'),
|
|
|
|
|
('width', 'left'), ('width', 'right')):
|
|
|
|
|
style_attribute = None
|
2016-09-01 03:11:33 +03:00
|
|
|
|
for prop in ('margin%s' % part, '%smargin' % position):
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get(prop):
|
|
|
|
|
style_attribute = 'margin-%s:%spx' % (
|
2016-09-01 03:11:33 +03:00
|
|
|
|
position, element.get(prop))
|
2016-08-29 18:50:07 +03:00
|
|
|
|
break
|
|
|
|
|
if style_attribute:
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('background'):
|
2016-09-01 03:11:33 +03:00
|
|
|
|
style_attribute = 'background-image:url(%s)' % (
|
2016-08-29 18:59:28 +03:00
|
|
|
|
element.get('background'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('bgcolor'):
|
2016-08-29 18:59:28 +03:00
|
|
|
|
style_attribute = 'background-color:%s' % (
|
|
|
|
|
element.get('bgcolor'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('text'):
|
|
|
|
|
style_attribute = 'color:%s' % element.get('text')
|
2016-09-01 03:11:33 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-09-01 03:11:33 +03:00
|
|
|
|
# TODO: we should support link, vlink, alink
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag == 'center':
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'text-align:center')
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag == 'div':
|
|
|
|
|
align = element.get('align', '').lower()
|
|
|
|
|
if align == 'middle':
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'text-align:center')
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif align in ('center', 'left', 'right', 'justify'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'text-align:%s' % align)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag == 'font':
|
|
|
|
|
if element.get('color'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'color:%s' % element.get('color'))
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('face'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'font-family:%s' % element.get('face'))
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('size'):
|
|
|
|
|
size = element.get('size').strip()
|
|
|
|
|
relative_plus = size.startswith('+')
|
|
|
|
|
relative_minus = size.startswith('-')
|
|
|
|
|
if relative_plus or relative_minus:
|
|
|
|
|
size = size[1:].strip()
|
|
|
|
|
try:
|
|
|
|
|
size = int(size)
|
|
|
|
|
except ValueError:
|
2017-03-28 10:32:50 +03:00
|
|
|
|
LOGGER.warning('Invalid value for size: %s', size)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
else:
|
|
|
|
|
font_sizes = {
|
|
|
|
|
1: 'x-small',
|
|
|
|
|
2: 'small',
|
|
|
|
|
3: 'medium',
|
|
|
|
|
4: 'large',
|
|
|
|
|
5: 'x-large',
|
|
|
|
|
6: 'xx-large',
|
2016-09-01 03:11:33 +03:00
|
|
|
|
7: '48px', # 1.5 * xx-large
|
2016-08-29 18:50:07 +03:00
|
|
|
|
}
|
|
|
|
|
if relative_plus:
|
|
|
|
|
size += 3
|
|
|
|
|
elif relative_minus:
|
|
|
|
|
size -= 3
|
2016-09-01 03:11:33 +03:00
|
|
|
|
size = max(1, min(7, size))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'font-size:%s' % font_sizes[size])
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag == 'table':
|
2016-09-01 03:11:33 +03:00
|
|
|
|
# TODO: we should support cellpadding
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('cellspacing'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-08-29 18:50:07 +03:00
|
|
|
|
'border-spacing:%spx' % element.get('cellspacing'))
|
2016-11-17 03:06:10 +03:00
|
|
|
|
if element.get('cellpadding'):
|
|
|
|
|
cellpadding = element.get('cellpadding')
|
|
|
|
|
if cellpadding.isdigit():
|
|
|
|
|
cellpadding += 'px'
|
|
|
|
|
# TODO: don't match subtables cells
|
|
|
|
|
for subelement in element.iter():
|
|
|
|
|
if subelement.tag in ('td', 'th'):
|
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
subelement,
|
2016-11-17 03:06:10 +03:00
|
|
|
|
'padding-left:%s;padding-right:%s;'
|
|
|
|
|
'padding-top:%s;padding-bottom:%s;' % (
|
|
|
|
|
4 * (cellpadding,)))
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('hspace'):
|
|
|
|
|
hspace = element.get('hspace')
|
|
|
|
|
if hspace.isdigit():
|
|
|
|
|
hspace += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-08-29 18:50:07 +03:00
|
|
|
|
'margin-left:%s;margin-right:%s' % (hspace, hspace))
|
|
|
|
|
if element.get('vspace'):
|
|
|
|
|
vspace = element.get('vspace')
|
|
|
|
|
if vspace.isdigit():
|
|
|
|
|
vspace += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-08-29 18:50:07 +03:00
|
|
|
|
'margin-top:%s;margin-bottom:%s' % (vspace, vspace))
|
|
|
|
|
if element.get('width'):
|
|
|
|
|
style_attribute = 'width:%s' % element.get('width')
|
|
|
|
|
if element.get('width').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('height'):
|
|
|
|
|
style_attribute = 'height:%s' % element.get('height')
|
|
|
|
|
if element.get('height').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('background'):
|
2016-09-01 03:11:33 +03:00
|
|
|
|
style_attribute = 'background-image:url(%s)' % (
|
2016-08-29 18:50:07 +03:00
|
|
|
|
element.get('background'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('bgcolor'):
|
|
|
|
|
style_attribute = 'background-color:%s' % (
|
|
|
|
|
element.get('bgcolor'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('bordercolor'):
|
|
|
|
|
style_attribute = 'border-color:%s' % (
|
|
|
|
|
element.get('bordercolor'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('border'):
|
|
|
|
|
style_attribute = 'border-width:%spx' % (
|
|
|
|
|
element.get('border'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag in ('tr', 'td', 'th', 'thead', 'tbody', 'tfoot'):
|
|
|
|
|
align = element.get('align', '').lower()
|
2018-09-20 16:29:27 +03:00
|
|
|
|
# TODO: we should align descendants too
|
|
|
|
|
if align == 'middle':
|
|
|
|
|
yield specificity, check_style_attribute(
|
|
|
|
|
element, 'text-align:center')
|
|
|
|
|
elif align in ('center', 'left', 'right', 'justify'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'text-align:%s' % align)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('background'):
|
2016-09-01 03:11:33 +03:00
|
|
|
|
style_attribute = 'background-image:url(%s)' % (
|
2016-08-29 18:50:07 +03:00
|
|
|
|
element.get('background'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('bgcolor'):
|
|
|
|
|
style_attribute = 'background-color:%s' % (
|
|
|
|
|
element.get('bgcolor'))
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-31 23:33:26 +03:00
|
|
|
|
if element.tag in ('tr', 'td', 'th'):
|
|
|
|
|
if element.get('height'):
|
|
|
|
|
style_attribute = 'height:%s' % element.get('height')
|
|
|
|
|
if element.get('height').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-31 23:33:26 +03:00
|
|
|
|
if element.tag in ('td', 'th'):
|
|
|
|
|
if element.get('width'):
|
|
|
|
|
style_attribute = 'width:%s' % element.get('width')
|
|
|
|
|
if element.get('width').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag == 'caption':
|
|
|
|
|
align = element.get('align', '').lower()
|
2016-08-31 23:33:26 +03:00
|
|
|
|
# TODO: we should align descendants too
|
2018-09-20 16:29:27 +03:00
|
|
|
|
if align == 'middle':
|
|
|
|
|
yield specificity, check_style_attribute(
|
|
|
|
|
element, 'text-align:center')
|
|
|
|
|
elif align in ('center', 'left', 'right', 'justify'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'text-align:%s' % align)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag == 'col':
|
|
|
|
|
if element.get('width'):
|
|
|
|
|
style_attribute = 'width:%s' % element.get('width')
|
|
|
|
|
if element.get('width').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag == 'hr':
|
|
|
|
|
size = 0
|
|
|
|
|
if element.get('size'):
|
|
|
|
|
try:
|
|
|
|
|
size = int(element.get('size'))
|
|
|
|
|
except ValueError:
|
2017-03-28 10:32:50 +03:00
|
|
|
|
LOGGER.warning('Invalid value for size: %s', size)
|
2016-09-01 03:11:33 +03:00
|
|
|
|
if (element.get('color'), element.get('noshade')) != (None, None):
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if size >= 1:
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'border-width:%spx' % (size / 2))
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif size == 1:
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'border-bottom-width:0')
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif size > 1:
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'height:%spx' % (size - 2))
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('width'):
|
|
|
|
|
style_attribute = 'width:%s' % element.get('width')
|
|
|
|
|
if element.get('width').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('color'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'color:%s' % element.get('color'))
|
2016-08-29 18:50:07 +03:00
|
|
|
|
elif element.tag in (
|
|
|
|
|
'iframe', 'applet', 'embed', 'img', 'input', 'object'):
|
|
|
|
|
if (element.tag != 'input' or
|
|
|
|
|
element.get('type', '').lower() == 'image'):
|
|
|
|
|
align = element.get('align', '').lower()
|
2016-08-31 23:33:26 +03:00
|
|
|
|
if align in ('middle', 'center'):
|
2016-08-29 18:50:07 +03:00
|
|
|
|
# TODO: middle and center values are wrong
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, 'vertical-align:middle')
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('hspace'):
|
|
|
|
|
hspace = element.get('hspace')
|
|
|
|
|
if hspace.isdigit():
|
|
|
|
|
hspace += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-08-29 18:50:07 +03:00
|
|
|
|
'margin-left:%s;margin-right:%s' % (hspace, hspace))
|
|
|
|
|
if element.get('vspace'):
|
|
|
|
|
vspace = element.get('vspace')
|
|
|
|
|
if vspace.isdigit():
|
|
|
|
|
vspace += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-08-29 18:50:07 +03:00
|
|
|
|
'margin-top:%s;margin-bottom:%s' % (vspace, vspace))
|
|
|
|
|
# TODO: img seems to be excluded for width and height, but a
|
|
|
|
|
# lot of W3C tests rely on this attribute being applied to img
|
|
|
|
|
if element.get('width'):
|
|
|
|
|
style_attribute = 'width:%s' % element.get('width')
|
|
|
|
|
if element.get('width').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.get('height'):
|
|
|
|
|
style_attribute = 'height:%s' % element.get('height')
|
|
|
|
|
if element.get('height').isdigit():
|
|
|
|
|
style_attribute += 'px'
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, style_attribute)
|
2016-08-29 18:50:07 +03:00
|
|
|
|
if element.tag in ('img', 'object', 'input'):
|
|
|
|
|
if element.get('border'):
|
2016-08-30 20:10:53 +03:00
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-08-29 18:50:07 +03:00
|
|
|
|
'border-width:%spx;border-style:solid' %
|
|
|
|
|
element.get('border'))
|
2016-11-17 02:37:03 +03:00
|
|
|
|
elif element.tag == 'ol':
|
|
|
|
|
# From https://www.w3.org/TR/css-lists-3/
|
|
|
|
|
if element.get('start'):
|
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-11-17 02:37:03 +03:00
|
|
|
|
'counter-reset:list-item %s;'
|
|
|
|
|
'counter-increment:list-item -1' % element.get('start'))
|
|
|
|
|
elif element.tag == 'ul':
|
|
|
|
|
# From https://www.w3.org/TR/css-lists-3/
|
|
|
|
|
if element.get('value'):
|
|
|
|
|
yield specificity, check_style_attribute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element,
|
2016-11-17 02:37:03 +03:00
|
|
|
|
'counter-reset:list-item %s;'
|
|
|
|
|
'counter-increment:none' % element.get('value'))
|
2011-06-29 13:07:48 +04:00
|
|
|
|
|
2011-05-04 13:47:04 +04:00
|
|
|
|
|
2017-08-07 13:21:17 +03:00
|
|
|
|
def matching_page_types(page_type, names=()):
|
2017-07-17 23:48:21 +03:00
|
|
|
|
sides = ['left', 'right', None] if page_type.side is None else [
|
|
|
|
|
page_type.side]
|
2017-08-07 13:21:17 +03:00
|
|
|
|
blanks = (True, False) if page_type.blank is False else (True,)
|
|
|
|
|
firsts = (True, False) if page_type.first is False else (True,)
|
|
|
|
|
names = (
|
|
|
|
|
tuple(names) + (None,) if page_type.name is None
|
|
|
|
|
else (page_type.name,))
|
2017-07-17 23:48:21 +03:00
|
|
|
|
for side in sides:
|
|
|
|
|
for blank in blanks:
|
|
|
|
|
for first in firsts:
|
2017-08-07 13:21:17 +03:00
|
|
|
|
for name in names:
|
|
|
|
|
yield PageType(
|
|
|
|
|
side=side, blank=blank, first=first, name=name)
|
2017-07-17 23:48:21 +03:00
|
|
|
|
|
|
|
|
|
|
2012-08-02 15:04:31 +04:00
|
|
|
|
def evaluate_media_query(query_list, device_media_type):
|
|
|
|
|
"""Return the boolean evaluation of `query_list` for the given
|
|
|
|
|
`device_media_type`.
|
2011-06-29 13:07:48 +04:00
|
|
|
|
|
2011-04-27 19:50:12 +04:00
|
|
|
|
:attr query_list: a cssutilts.stlysheets.MediaList
|
2012-08-02 15:04:31 +04:00
|
|
|
|
:attr device_media_type: a media type string (for now)
|
2011-06-29 13:07:48 +04:00
|
|
|
|
|
2011-04-27 19:50:12 +04:00
|
|
|
|
"""
|
2011-05-04 13:47:04 +04:00
|
|
|
|
# TODO: actual support for media queries, not just media types
|
2012-08-02 15:04:31 +04:00
|
|
|
|
return 'all' in query_list or device_media_type in query_list
|
2011-04-27 19:50:12 +04:00
|
|
|
|
|
|
|
|
|
|
2011-07-21 14:31:08 +04:00
|
|
|
|
def declaration_precedence(origin, importance):
|
2011-08-22 20:33:54 +04:00
|
|
|
|
"""Return the precedence for a declaration.
|
|
|
|
|
|
|
|
|
|
Precedence values have no meaning unless compared to each other.
|
|
|
|
|
|
|
|
|
|
Acceptable values for ``origin`` are the strings ``'author'``, ``'user'``
|
|
|
|
|
and ``'user agent'``.
|
2011-05-04 13:47:04 +04:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
# See http://www.w3.org/TR/CSS21/cascade.html#cascading-order
|
|
|
|
|
if origin == 'user agent':
|
|
|
|
|
return 1
|
2011-07-21 14:31:08 +04:00
|
|
|
|
elif origin == 'user' and not importance:
|
2011-05-04 13:47:04 +04:00
|
|
|
|
return 2
|
2011-07-21 14:31:08 +04:00
|
|
|
|
elif origin == 'author' and not importance:
|
2011-05-04 13:47:04 +04:00
|
|
|
|
return 3
|
2011-08-22 20:33:54 +04:00
|
|
|
|
elif origin == 'author': # and importance
|
2011-05-04 13:47:04 +04:00
|
|
|
|
return 4
|
|
|
|
|
else:
|
2012-04-02 16:45:44 +04:00
|
|
|
|
assert origin == 'user' # and importance
|
|
|
|
|
return 5
|
2011-05-12 18:06:47 +04:00
|
|
|
|
|
2011-05-04 13:47:04 +04:00
|
|
|
|
|
2011-07-21 15:47:42 +04:00
|
|
|
|
def add_declaration(cascaded_styles, prop_name, prop_values, weight, element,
|
2011-07-21 14:31:08 +04:00
|
|
|
|
pseudo_type=None):
|
2011-08-22 20:33:54 +04:00
|
|
|
|
"""Set the value for a property on a given element.
|
|
|
|
|
|
|
|
|
|
The value is only set if there is no value of greater weight defined yet.
|
|
|
|
|
|
2011-07-20 20:23:54 +04:00
|
|
|
|
"""
|
2011-07-21 15:47:42 +04:00
|
|
|
|
style = cascaded_styles.setdefault((element, pseudo_type), {})
|
2011-07-21 14:31:08 +04:00
|
|
|
|
_values, previous_weight = style.get(prop_name, (None, None))
|
|
|
|
|
if previous_weight is None or previous_weight <= weight:
|
|
|
|
|
style[prop_name] = prop_values, weight
|
2011-07-20 20:23:54 +04:00
|
|
|
|
|
|
|
|
|
|
2016-02-12 01:02:53 +03:00
|
|
|
|
def set_computed_styles(cascaded_styles, computed_styles, element, parent,
|
2018-03-28 01:34:34 +03:00
|
|
|
|
root=None, pseudo_type=None, base_url=None,
|
|
|
|
|
target_collector=None):
|
2011-08-22 20:33:54 +04:00
|
|
|
|
"""Set the computed values of styles to ``element``.
|
|
|
|
|
|
2011-05-23 15:59:47 +04:00
|
|
|
|
Take the properties left by ``apply_style_rule`` on an element or
|
|
|
|
|
pseudo-element and assign computed values with respect to the cascade,
|
2011-05-04 13:47:04 +04:00
|
|
|
|
declaration priority (ie. ``!important``) and selector specificity.
|
2011-08-22 20:33:54 +04:00
|
|
|
|
|
2011-05-04 13:47:04 +04:00
|
|
|
|
"""
|
2017-08-14 12:37:05 +03:00
|
|
|
|
if element == root and pseudo_type is None:
|
2017-07-18 03:01:36 +03:00
|
|
|
|
assert parent is None
|
|
|
|
|
parent_style = None
|
|
|
|
|
root_style = {
|
|
|
|
|
# When specified on the font-size property of the root element, the
|
|
|
|
|
# rem units refer to the property’s initial value.
|
|
|
|
|
'font_size': properties.INITIAL_VALUES['font_size'],
|
|
|
|
|
}
|
|
|
|
|
else:
|
|
|
|
|
assert parent is not None
|
|
|
|
|
parent_style = computed_styles[parent, None]
|
|
|
|
|
root_style = computed_styles[root, None]
|
2016-02-12 01:02:53 +03:00
|
|
|
|
|
2017-07-18 03:01:36 +03:00
|
|
|
|
cascaded = cascaded_styles.get((element, pseudo_type), {})
|
2016-02-12 01:02:53 +03:00
|
|
|
|
computed_styles[element, pseudo_type] = computed_from_cascaded(
|
2018-03-28 01:34:34 +03:00
|
|
|
|
element, cascaded, parent_style, pseudo_type, root_style, base_url,
|
|
|
|
|
target_collector)
|
2011-05-12 18:06:47 +04:00
|
|
|
|
|
|
|
|
|
|
2016-02-12 01:02:53 +03:00
|
|
|
|
def computed_from_cascaded(element, cascaded, parent_style, pseudo_type=None,
|
2018-03-28 01:34:34 +03:00
|
|
|
|
root_style=None, base_url=None,
|
|
|
|
|
target_collector=None):
|
2011-08-22 20:33:54 +04:00
|
|
|
|
"""Get a dict of computed style mixed from parent and cascaded styles."""
|
2011-08-18 19:05:34 +04:00
|
|
|
|
if not cascaded and parent_style is not None:
|
|
|
|
|
# Fast path for anonymous boxes:
|
2011-08-22 20:33:54 +04:00
|
|
|
|
# no cascaded style, only implicitly initial or inherited values.
|
2017-07-28 14:35:06 +03:00
|
|
|
|
computed = dict(properties.INITIAL_VALUES)
|
2011-08-18 19:05:34 +04:00
|
|
|
|
for name in properties.INHERITED:
|
|
|
|
|
computed[name] = parent_style[name]
|
2017-07-18 03:01:36 +03:00
|
|
|
|
# page is not inherited but taken from the ancestor if 'auto'
|
|
|
|
|
computed['page'] = parent_style['page']
|
2011-12-16 19:02:49 +04:00
|
|
|
|
# border-*-style is none, so border-width computes to zero.
|
2011-08-18 19:05:34 +04:00
|
|
|
|
# Other than that, properties that would need computing are
|
2011-12-16 19:02:49 +04:00
|
|
|
|
# border-*-color, but they do not apply.
|
2011-08-18 19:05:34 +04:00
|
|
|
|
for side in ('top', 'bottom', 'left', 'right'):
|
2011-10-08 17:46:41 +04:00
|
|
|
|
computed['border_%s_width' % side] = 0
|
2012-08-03 18:21:47 +04:00
|
|
|
|
computed['outline_width'] = 0
|
2018-01-13 19:41:08 +03:00
|
|
|
|
return computed
|
2011-08-18 19:05:34 +04:00
|
|
|
|
|
2011-08-18 17:44:45 +04:00
|
|
|
|
# Handle inheritance and initial values
|
2017-07-28 14:35:06 +03:00
|
|
|
|
specified = {}
|
|
|
|
|
computed = {}
|
2018-01-14 03:48:17 +03:00
|
|
|
|
for name, initial in properties.INITIAL_VALUES.items():
|
2011-08-18 17:44:45 +04:00
|
|
|
|
if name in cascaded:
|
2011-10-08 16:41:12 +04:00
|
|
|
|
value, _precedence = cascaded[name]
|
|
|
|
|
keyword = value
|
2011-08-18 17:44:45 +04:00
|
|
|
|
else:
|
2011-08-15 16:19:33 +04:00
|
|
|
|
if name in properties.INHERITED:
|
|
|
|
|
keyword = 'inherit'
|
|
|
|
|
else:
|
|
|
|
|
keyword = 'initial'
|
|
|
|
|
|
|
|
|
|
if keyword == 'inherit' and parent_style is None:
|
|
|
|
|
# On the root element, 'inherit' from initial values
|
|
|
|
|
keyword = 'initial'
|
|
|
|
|
|
|
|
|
|
if keyword == 'initial':
|
2011-10-08 16:41:12 +04:00
|
|
|
|
value = initial
|
2017-10-13 10:48:57 +03:00
|
|
|
|
if name not in INITIAL_NOT_COMPUTED:
|
2011-12-16 20:53:11 +04:00
|
|
|
|
# The value is the same as when computed
|
|
|
|
|
computed[name] = value
|
2011-08-15 16:19:33 +04:00
|
|
|
|
elif keyword == 'inherit':
|
2011-10-08 16:41:12 +04:00
|
|
|
|
value = parent_style[name]
|
|
|
|
|
# Values in parent_style are already computed.
|
|
|
|
|
computed[name] = value
|
2011-08-15 16:19:33 +04:00
|
|
|
|
|
2011-10-08 16:41:12 +04:00
|
|
|
|
specified[name] = value
|
2011-08-15 16:19:33 +04:00
|
|
|
|
|
2017-07-18 03:01:36 +03:00
|
|
|
|
if specified['page'] == 'auto':
|
|
|
|
|
# The page property does not inherit. However, if the page value on
|
|
|
|
|
# an element is auto, then its used value is the value specified on
|
|
|
|
|
# its nearest ancestor with a non-auto value. When specified on the
|
|
|
|
|
# root element, the used value for auto is the empty string.
|
2017-07-18 13:01:50 +03:00
|
|
|
|
computed['page'] = specified['page'] = (
|
2017-07-18 03:01:36 +03:00
|
|
|
|
'' if parent_style is None else parent_style['page'])
|
|
|
|
|
|
2018-01-13 19:41:08 +03:00
|
|
|
|
return computed_values.compute(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
element, pseudo_type, specified, computed, parent_style, root_style,
|
2018-03-28 01:34:34 +03:00
|
|
|
|
base_url, target_collector)
|
2011-06-29 13:07:48 +04:00
|
|
|
|
|
2011-05-12 18:06:47 +04:00
|
|
|
|
|
2018-01-29 00:17:26 +03:00
|
|
|
|
def parse_page_selectors(rule):
|
|
|
|
|
"""Parse a page selector rule.
|
|
|
|
|
|
|
|
|
|
Return a list of page data if the rule is correctly parsed. Page data are a
|
|
|
|
|
dict containing:
|
|
|
|
|
|
|
|
|
|
- 'side' ('left', 'right' or None),
|
|
|
|
|
- 'blank' (True or False),
|
|
|
|
|
- 'first' (True or False),
|
|
|
|
|
- 'name' (page name string or None), and
|
|
|
|
|
- 'spacificity' (list of numbers).
|
|
|
|
|
|
|
|
|
|
Return ``None` if something went wrong while parsing the rule.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
# See https://drafts.csswg.org/css-page-3/#syntax-page-selector
|
|
|
|
|
|
|
|
|
|
tokens = list(remove_whitespace(rule.prelude))
|
|
|
|
|
page_data = []
|
|
|
|
|
|
|
|
|
|
# TODO: Specificity is probably wrong, should clean and test that.
|
|
|
|
|
if not tokens:
|
|
|
|
|
page_data.append({
|
|
|
|
|
'side': None, 'blank': False, 'first': False, 'name': None,
|
|
|
|
|
'specificity': [0, 0, 0]})
|
|
|
|
|
return page_data
|
|
|
|
|
|
|
|
|
|
while tokens:
|
|
|
|
|
types = {
|
|
|
|
|
'side': None, 'blank': False, 'first': False, 'name': None,
|
|
|
|
|
'specificity': [0, 0, 0]}
|
|
|
|
|
|
|
|
|
|
if tokens[0].type == 'ident':
|
|
|
|
|
token = tokens.pop(0)
|
|
|
|
|
types['name'] = token.value
|
|
|
|
|
types['specificity'][0] = 1
|
|
|
|
|
|
|
|
|
|
if len(tokens) == 1:
|
|
|
|
|
return None
|
|
|
|
|
elif not tokens:
|
|
|
|
|
page_data.append(types)
|
|
|
|
|
return page_data
|
|
|
|
|
|
|
|
|
|
while tokens:
|
|
|
|
|
literal = tokens.pop(0)
|
|
|
|
|
if literal.type != 'literal':
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if literal.value == ':':
|
|
|
|
|
if not tokens or tokens[0].type != 'ident':
|
|
|
|
|
return None
|
|
|
|
|
ident = tokens.pop(0)
|
|
|
|
|
pseudo_class = ident.lower_value
|
|
|
|
|
if pseudo_class in ('left', 'right'):
|
|
|
|
|
if types['side']:
|
|
|
|
|
return None
|
|
|
|
|
types['side'] = pseudo_class
|
|
|
|
|
types['specificity'][2] += 1
|
|
|
|
|
elif pseudo_class in ('blank', 'first'):
|
|
|
|
|
if types[pseudo_class]:
|
|
|
|
|
return None
|
|
|
|
|
types[pseudo_class] = True
|
|
|
|
|
types['specificity'][1] += 1
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
elif literal.value == ',':
|
|
|
|
|
if tokens and any(types['specificity']):
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
page_data.append(types)
|
|
|
|
|
|
|
|
|
|
return page_data
|
|
|
|
|
|
|
|
|
|
|
2016-09-24 16:36:26 +03:00
|
|
|
|
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
|
2017-06-30 18:54:02 +03:00
|
|
|
|
url_fetcher, matcher, page_rules, fonts,
|
2017-08-17 08:47:20 +03:00
|
|
|
|
font_config, ignore_imports=False):
|
2012-03-25 03:39:41 +04:00
|
|
|
|
"""Do the work that can be done early on stylesheet, before they are
|
|
|
|
|
in a document.
|
|
|
|
|
|
|
|
|
|
"""
|
2016-09-24 16:36:26 +03:00
|
|
|
|
for rule in stylesheet_rules:
|
2017-08-14 15:11:57 +03:00
|
|
|
|
if getattr(rule, 'content', None) is None and (
|
2017-08-17 08:52:18 +03:00
|
|
|
|
rule.type != 'at-rule' or rule.lower_at_keyword != 'import'):
|
2017-08-14 15:11:57 +03:00
|
|
|
|
continue
|
|
|
|
|
|
2017-03-26 12:42:50 +03:00
|
|
|
|
if rule.type == 'qualified-rule':
|
2012-03-25 04:41:02 +04:00
|
|
|
|
declarations = list(preprocess_declarations(
|
2017-03-26 12:42:50 +03:00
|
|
|
|
base_url, tinycss2.parse_declaration_list(rule.content)))
|
2012-03-25 04:41:02 +04:00
|
|
|
|
if declarations:
|
2019-01-04 03:22:49 +03:00
|
|
|
|
logger_level = WARNING
|
2012-04-25 21:05:42 +04:00
|
|
|
|
try:
|
2017-06-30 18:54:02 +03:00
|
|
|
|
selectors = cssselect2.compile_selector_list(rule.prelude)
|
|
|
|
|
for selector in selectors:
|
|
|
|
|
matcher.add_selector(selector, declarations)
|
2012-04-25 21:05:42 +04:00
|
|
|
|
if selector.pseudo_element not in PSEUDO_ELEMENTS:
|
2019-01-04 03:22:49 +03:00
|
|
|
|
if selector.pseudo_element.startswith('-'):
|
|
|
|
|
logger_level = DEBUG
|
|
|
|
|
raise cssselect2.SelectorError(
|
|
|
|
|
'ignored prefixed pseudo-element: %s'
|
|
|
|
|
% selector.pseudo_element)
|
|
|
|
|
else:
|
|
|
|
|
raise cssselect2.SelectorError(
|
|
|
|
|
'unknown pseudo-element: %s'
|
|
|
|
|
% selector.pseudo_element)
|
2017-08-17 08:47:20 +03:00
|
|
|
|
ignore_imports = True
|
2017-06-30 18:54:02 +03:00
|
|
|
|
except cssselect2.SelectorError as exc:
|
2019-01-04 03:22:49 +03:00
|
|
|
|
LOGGER.log(
|
|
|
|
|
logger_level,
|
|
|
|
|
"Invalid or unsupported selector '%s', %s",
|
|
|
|
|
tinycss2.serialize(rule.prelude), exc)
|
2012-04-25 21:05:42 +04:00
|
|
|
|
continue
|
2017-08-17 08:47:20 +03:00
|
|
|
|
else:
|
|
|
|
|
ignore_imports = True
|
2012-03-25 04:41:02 +04:00
|
|
|
|
|
2017-08-17 08:52:18 +03:00
|
|
|
|
elif rule.type == 'at-rule' and rule.lower_at_keyword == 'import':
|
2017-08-17 08:47:20 +03:00
|
|
|
|
if ignore_imports:
|
|
|
|
|
LOGGER.warning('@import rule "%s" not at the beginning of the '
|
|
|
|
|
'the whole rule was ignored at %s:%s.',
|
|
|
|
|
tinycss2.serialize(rule.prelude),
|
|
|
|
|
rule.source_line, rule.source_column)
|
|
|
|
|
continue
|
|
|
|
|
|
2017-03-26 12:42:50 +03:00
|
|
|
|
tokens = remove_whitespace(rule.prelude)
|
|
|
|
|
if tokens and tokens[0].type in ('url', 'string'):
|
|
|
|
|
url = tokens[0].value
|
|
|
|
|
else:
|
|
|
|
|
continue
|
|
|
|
|
media = parse_media_query(tokens[1:])
|
|
|
|
|
if media is None:
|
|
|
|
|
LOGGER.warning('Invalid media type "%s" '
|
|
|
|
|
'the whole @import rule was ignored at %s:%s.',
|
|
|
|
|
tinycss2.serialize(rule.prelude),
|
|
|
|
|
rule.source_line, rule.source_column)
|
2017-08-14 12:36:35 +03:00
|
|
|
|
continue
|
2017-03-26 12:42:50 +03:00
|
|
|
|
if not evaluate_media_query(media, device_media_type):
|
2012-03-25 04:41:02 +04:00
|
|
|
|
continue
|
2017-03-25 20:28:41 +03:00
|
|
|
|
url = url_join(
|
2017-03-26 12:42:50 +03:00
|
|
|
|
base_url, url, allow_relative=False,
|
2017-03-25 20:28:41 +03:00
|
|
|
|
context='@import at %s:%s',
|
2017-03-26 12:42:50 +03:00
|
|
|
|
context_args=(rule.source_line, rule.source_column))
|
2012-09-25 18:01:12 +04:00
|
|
|
|
if url is not None:
|
2013-06-21 01:07:52 +04:00
|
|
|
|
try:
|
2017-06-30 18:54:02 +03:00
|
|
|
|
CSS(
|
2016-10-28 18:44:31 +03:00
|
|
|
|
url=url, url_fetcher=url_fetcher,
|
2017-06-30 18:54:02 +03:00
|
|
|
|
media_type=device_media_type, font_config=font_config,
|
|
|
|
|
matcher=matcher, page_rules=page_rules)
|
2013-06-21 01:07:52 +04:00
|
|
|
|
except URLFetchingError as exc:
|
2017-07-25 14:59:56 +03:00
|
|
|
|
LOGGER.error(
|
|
|
|
|
'Failed to load stylesheet at %s : %s', url, exc)
|
2012-03-25 04:41:02 +04:00
|
|
|
|
|
2017-08-17 08:52:18 +03:00
|
|
|
|
elif rule.type == 'at-rule' and rule.lower_at_keyword == 'media':
|
2017-03-26 12:42:50 +03:00
|
|
|
|
media = parse_media_query(rule.prelude)
|
|
|
|
|
if media is None:
|
|
|
|
|
LOGGER.warning('Invalid media type "%s" '
|
|
|
|
|
'the whole @media rule was ignored at %s:%s.',
|
|
|
|
|
tinycss2.serialize(rule.prelude),
|
|
|
|
|
rule.source_line, rule.source_column)
|
2012-03-25 04:41:02 +04:00
|
|
|
|
continue
|
2017-08-17 08:47:20 +03:00
|
|
|
|
ignore_imports = True
|
2017-03-26 12:42:50 +03:00
|
|
|
|
if not evaluate_media_query(media, device_media_type):
|
|
|
|
|
continue
|
|
|
|
|
content_rules = tinycss2.parse_rule_list(rule.content)
|
2016-09-24 16:36:26 +03:00
|
|
|
|
preprocess_stylesheet(
|
2017-06-30 18:54:02 +03:00
|
|
|
|
device_media_type, base_url, content_rules, url_fetcher,
|
2017-08-17 08:47:20 +03:00
|
|
|
|
matcher, page_rules, fonts, font_config, ignore_imports=True)
|
2012-03-25 04:41:02 +04:00
|
|
|
|
|
2017-08-17 08:52:18 +03:00
|
|
|
|
elif rule.type == 'at-rule' and rule.lower_at_keyword == 'page':
|
2018-01-29 00:17:26 +03:00
|
|
|
|
data = parse_page_selectors(rule)
|
|
|
|
|
|
|
|
|
|
if data is None:
|
|
|
|
|
LOGGER.warning(
|
|
|
|
|
'Unsupported @page selector "%s", '
|
|
|
|
|
'the whole @page rule was ignored at %s:%s.',
|
|
|
|
|
tinycss2.serialize(rule.prelude),
|
|
|
|
|
rule.source_line, rule.source_column)
|
2012-03-25 04:41:02 +04:00
|
|
|
|
continue
|
2012-04-25 21:05:42 +04:00
|
|
|
|
|
2018-01-29 00:17:26 +03:00
|
|
|
|
ignore_imports = True
|
|
|
|
|
for page_type in data:
|
|
|
|
|
specificity = page_type.pop('specificity')
|
|
|
|
|
page_type = PageType(**page_type)
|
|
|
|
|
# Use a double lambda to have a closure that holds page_types
|
|
|
|
|
match = (lambda page_type: lambda page_names: list(
|
|
|
|
|
matching_page_types(page_type, names=page_names)))(
|
|
|
|
|
page_type)
|
|
|
|
|
content = tinycss2.parse_declaration_list(rule.content)
|
|
|
|
|
declarations = list(preprocess_declarations(base_url, content))
|
2012-03-25 04:41:02 +04:00
|
|
|
|
|
|
|
|
|
if declarations:
|
2018-01-29 00:17:26 +03:00
|
|
|
|
selector_list = [(specificity, None, match)]
|
|
|
|
|
page_rules.append((rule, selector_list, declarations))
|
|
|
|
|
|
|
|
|
|
for margin_rule in content:
|
|
|
|
|
if margin_rule.type != 'at-rule' or (
|
|
|
|
|
margin_rule.content is None):
|
|
|
|
|
continue
|
|
|
|
|
declarations = list(preprocess_declarations(
|
|
|
|
|
base_url,
|
|
|
|
|
tinycss2.parse_declaration_list(margin_rule.content)))
|
|
|
|
|
if declarations:
|
|
|
|
|
selector_list = [(
|
|
|
|
|
specificity, '@' + margin_rule.lower_at_keyword,
|
|
|
|
|
match)]
|
|
|
|
|
page_rules.append(
|
|
|
|
|
(margin_rule, selector_list, declarations))
|
2012-03-25 03:39:41 +04:00
|
|
|
|
|
2017-08-17 08:52:18 +03:00
|
|
|
|
elif rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face':
|
2017-08-17 08:47:20 +03:00
|
|
|
|
ignore_imports = True
|
2017-03-26 12:42:50 +03:00
|
|
|
|
content = tinycss2.parse_declaration_list(rule.content)
|
2017-07-01 08:26:36 +03:00
|
|
|
|
rule_descriptors = dict(preprocess_descriptors(base_url, content))
|
2016-09-23 20:00:14 +03:00
|
|
|
|
for key in ('src', 'font_family'):
|
2016-09-26 13:15:12 +03:00
|
|
|
|
if key not in rule_descriptors:
|
2016-09-23 20:00:14 +03:00
|
|
|
|
LOGGER.warning(
|
|
|
|
|
"Missing %s descriptor in '@font-face' rule at %s:%s",
|
2017-03-26 12:42:50 +03:00
|
|
|
|
key.replace('_', '-'),
|
|
|
|
|
rule.source_line, rule.source_column)
|
2016-09-23 20:00:14 +03:00
|
|
|
|
break
|
|
|
|
|
else:
|
2016-11-17 23:29:39 +03:00
|
|
|
|
if font_config is not None:
|
|
|
|
|
font_filename = font_config.add_font_face(
|
|
|
|
|
rule_descriptors, url_fetcher)
|
|
|
|
|
if font_filename:
|
|
|
|
|
fonts.append(font_filename)
|
2012-03-25 03:39:41 +04:00
|
|
|
|
|
|
|
|
|
|
2017-03-26 12:42:50 +03:00
|
|
|
|
def parse_media_query(tokens):
|
|
|
|
|
tokens = remove_whitespace(tokens)
|
|
|
|
|
if not tokens:
|
|
|
|
|
return ['all']
|
|
|
|
|
else:
|
|
|
|
|
media = []
|
|
|
|
|
for part in split_on_comma(tokens):
|
|
|
|
|
types = [token.type for token in part]
|
|
|
|
|
if types == ['ident']:
|
|
|
|
|
media.append(part[0].lower_value)
|
|
|
|
|
else:
|
2017-07-25 14:59:56 +03:00
|
|
|
|
LOGGER.warning(
|
|
|
|
|
'Expected a media type, got %s', tinycss2.serialize(part))
|
2017-03-26 12:42:50 +03:00
|
|
|
|
return
|
|
|
|
|
return media
|
|
|
|
|
|
|
|
|
|
|
2016-08-30 19:15:30 +03:00
|
|
|
|
def get_all_computed_styles(html, user_stylesheets=None,
|
2017-06-30 18:54:02 +03:00
|
|
|
|
presentational_hints=False, font_config=None,
|
2018-03-28 01:34:34 +03:00
|
|
|
|
page_rules=None, target_collector=None):
|
2016-08-30 19:15:30 +03:00
|
|
|
|
"""Compute all the computed styles of all elements in ``html`` document.
|
2011-08-22 20:33:54 +04:00
|
|
|
|
|
2012-09-25 18:01:47 +04:00
|
|
|
|
Do everything from finding author stylesheets to parsing and applying them.
|
2011-06-29 13:07:48 +04:00
|
|
|
|
|
2012-09-25 18:01:47 +04:00
|
|
|
|
Return a ``style_for`` function that takes an element and an optional
|
2018-01-13 19:41:08 +03:00
|
|
|
|
pseudo-element type, and return a style dict object.
|
2011-08-22 20:33:54 +04:00
|
|
|
|
|
2011-05-04 13:47:04 +04:00
|
|
|
|
"""
|
2017-07-01 08:41:13 +03:00
|
|
|
|
# List stylesheets. Order here is not important ('origin' is).
|
|
|
|
|
sheets = []
|
|
|
|
|
for sheet in (html._ua_stylesheets() or []):
|
|
|
|
|
sheets.append((sheet, 'user agent', None))
|
2016-08-31 23:33:26 +03:00
|
|
|
|
if presentational_hints:
|
2017-07-01 08:41:13 +03:00
|
|
|
|
for sheet in (html._ph_stylesheets() or []):
|
|
|
|
|
sheets.append((sheet, 'author', (0, 0, 0)))
|
|
|
|
|
for sheet in find_stylesheets(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
html.wrapper_element, html.media_type, html.url_fetcher,
|
2017-07-07 12:14:07 +03:00
|
|
|
|
html.base_url, font_config, page_rules):
|
2017-07-01 08:41:13 +03:00
|
|
|
|
sheets.append((sheet, 'author', None))
|
|
|
|
|
for sheet in (user_stylesheets or []):
|
|
|
|
|
sheets.append((sheet, 'user', None))
|
2011-07-05 17:02:18 +04:00
|
|
|
|
|
2011-07-21 16:32:43 +04:00
|
|
|
|
# keys: (element, pseudo_element_type)
|
2017-07-07 12:14:07 +03:00
|
|
|
|
# element: an ElementTree Element or the '@page' string for @page styles
|
2011-07-21 16:32:43 +04:00
|
|
|
|
# pseudo_element_type: a string such as 'first' (for @page) or 'after',
|
|
|
|
|
# or None for normal elements
|
|
|
|
|
# values: dicts of
|
|
|
|
|
# keys: property name as a string
|
|
|
|
|
# values: (values, weight)
|
|
|
|
|
# values: a PropertyValue-like object
|
|
|
|
|
# weight: values with a greater weight take precedence, see
|
|
|
|
|
# http://www.w3.org/TR/CSS21/cascade.html#cascading-order
|
|
|
|
|
cascaded_styles = {}
|
|
|
|
|
|
2019-01-04 01:02:44 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 3 - Applying CSS')
|
2016-11-10 21:15:35 +03:00
|
|
|
|
for specificity, attributes in find_style_attributes(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
html.etree_element, presentational_hints, html.base_url):
|
2016-11-10 21:15:35 +03:00
|
|
|
|
element, declarations, base_url = attributes
|
|
|
|
|
for name, values, importance in preprocess_declarations(
|
|
|
|
|
base_url, declarations):
|
|
|
|
|
precedence = declaration_precedence('author', importance)
|
|
|
|
|
weight = (precedence, specificity)
|
|
|
|
|
add_declaration(cascaded_styles, name, values, weight, element)
|
|
|
|
|
|
2011-07-21 16:32:43 +04:00
|
|
|
|
# keys: (element, pseudo_element_type), like cascaded_styles
|
2018-01-13 19:41:08 +03:00
|
|
|
|
# values: style dict objects:
|
2011-07-21 16:32:43 +04:00
|
|
|
|
# keys: property name as a string
|
|
|
|
|
# values: a PropertyValue-like object
|
2011-07-21 15:47:42 +04:00
|
|
|
|
computed_styles = {}
|
2011-07-20 20:23:54 +04:00
|
|
|
|
|
2017-07-01 08:26:36 +03:00
|
|
|
|
# First, add declarations and set computed styles for "real" elements *in
|
|
|
|
|
# tree order*. Tree order is important so that parents have computed
|
|
|
|
|
# styles before their children, for inheritance.
|
2011-07-21 14:31:08 +04:00
|
|
|
|
|
|
|
|
|
# Iterate on all elements, even if there is no cascaded style for them.
|
2017-07-03 16:19:05 +03:00
|
|
|
|
for element in html.wrapper_element.iter_subtree():
|
2017-07-01 08:41:13 +03:00
|
|
|
|
for sheet, origin, sheet_specificity in sheets:
|
|
|
|
|
# Add declarations for matched elements
|
|
|
|
|
for selector in sheet.matcher.match(element):
|
|
|
|
|
specificity, order, pseudo_type, declarations = selector
|
|
|
|
|
specificity = sheet_specificity or specificity
|
|
|
|
|
for name, values, importance in declarations:
|
|
|
|
|
precedence = declaration_precedence(origin, importance)
|
|
|
|
|
weight = (precedence, specificity)
|
|
|
|
|
add_declaration(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
cascaded_styles, name, values, weight,
|
|
|
|
|
element.etree_element, pseudo_type)
|
2017-07-01 08:41:13 +03:00
|
|
|
|
set_computed_styles(
|
2017-07-03 16:19:05 +03:00
|
|
|
|
cascaded_styles, computed_styles, element.etree_element,
|
|
|
|
|
root=html.etree_element,
|
|
|
|
|
parent=(element.parent.etree_element if element.parent else None),
|
2018-03-28 01:34:34 +03:00
|
|
|
|
base_url=html.base_url, target_collector=target_collector)
|
2011-06-29 13:07:48 +04:00
|
|
|
|
|
2017-08-07 13:21:17 +03:00
|
|
|
|
page_names = set(style['page'] for style in computed_styles.values())
|
|
|
|
|
|
|
|
|
|
for sheet, origin, sheet_specificity in sheets:
|
|
|
|
|
# Add declarations for page elements
|
|
|
|
|
for _rule, selector_list, declarations in sheet.page_rules:
|
|
|
|
|
for selector in selector_list:
|
|
|
|
|
specificity, pseudo_type, match = selector
|
|
|
|
|
specificity = sheet_specificity or specificity
|
|
|
|
|
for page_type in match(page_names):
|
|
|
|
|
for name, values, importance in declarations:
|
|
|
|
|
precedence = declaration_precedence(origin, importance)
|
|
|
|
|
weight = (precedence, specificity)
|
|
|
|
|
add_declaration(
|
|
|
|
|
cascaded_styles, name, values, weight, page_type,
|
|
|
|
|
pseudo_type)
|
|
|
|
|
|
2011-07-21 14:31:08 +04:00
|
|
|
|
# Then computed styles for pseudo elements, in any order.
|
|
|
|
|
# Pseudo-elements inherit from their associated element so they come
|
2011-12-27 18:16:43 +04:00
|
|
|
|
# last. Do them in a second pass as there is no easy way to iterate
|
2011-07-21 14:31:08 +04:00
|
|
|
|
# on the pseudo-elements for a given element with the current structure
|
2011-07-21 15:47:42 +04:00
|
|
|
|
# of cascaded_styles. (Keys are (element, pseudo_type) tuples.)
|
2011-07-21 14:31:08 +04:00
|
|
|
|
|
|
|
|
|
# Only iterate on pseudo-elements that have cascaded styles. (Others
|
|
|
|
|
# might as well not exist.)
|
2011-07-21 15:47:42 +04:00
|
|
|
|
for element, pseudo_type in cascaded_styles:
|
2017-07-18 00:40:59 +03:00
|
|
|
|
if pseudo_type and not isinstance(element, PageType):
|
2017-07-03 16:19:05 +03:00
|
|
|
|
set_computed_styles(
|
|
|
|
|
cascaded_styles, computed_styles, element,
|
|
|
|
|
pseudo_type=pseudo_type,
|
|
|
|
|
# The pseudo-element inherits from the element.
|
|
|
|
|
root=html.etree_element, parent=element,
|
2018-03-28 01:34:34 +03:00
|
|
|
|
base_url=html.base_url, target_collector=target_collector)
|
2011-07-21 14:31:08 +04:00
|
|
|
|
|
2012-08-03 13:51:58 +04:00
|
|
|
|
# This is mostly useful to make pseudo_type optional.
|
|
|
|
|
def style_for(element, pseudo_type=None, __get=computed_styles.get):
|
2018-08-08 18:49:03 +03:00
|
|
|
|
"""Convenience function to get the computed styles for an element."""
|
2017-07-31 14:48:58 +03:00
|
|
|
|
style = __get((element, pseudo_type))
|
|
|
|
|
|
2017-08-01 10:53:30 +03:00
|
|
|
|
if style:
|
|
|
|
|
if 'table' in style['display']:
|
|
|
|
|
if (style['display'] in ('table', 'inline-table') and
|
|
|
|
|
style['border_collapse'] == 'collapse'):
|
|
|
|
|
# Padding do not apply
|
|
|
|
|
for side in ['top', 'bottom', 'left', 'right']:
|
|
|
|
|
style['padding_' + side] = computed_values.ZERO_PIXELS
|
|
|
|
|
if (style['display'].startswith('table-') and
|
|
|
|
|
style['display'] != 'table-caption'):
|
|
|
|
|
# Margins do not apply
|
|
|
|
|
for side in ['top', 'bottom', 'left', 'right']:
|
|
|
|
|
style['margin_' + side] = computed_values.ZERO_PIXELS
|
2017-07-31 14:48:58 +03:00
|
|
|
|
|
2017-08-02 21:21:34 +03:00
|
|
|
|
return style
|
2012-08-03 13:51:58 +04:00
|
|
|
|
|
2017-07-17 23:48:21 +03:00
|
|
|
|
return style_for, cascaded_styles, computed_styles
|