mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 00:21:15 +03:00
2011-12-02 15:34:47 +01:00

233 lines
7.8 KiB

# coding: utf8
# WeasyPrint converts web documents (HTML, CSS, ...) to PDF.
# Copyright (C) 2011 Simon Sapin
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
Output document classes for various formats.
import os.path
import math
from cssutils import parseFile
import lxml.html
import cairo
from .css import get_all_computed_styles, WeasyCSSParser
from .css.computed_values import LENGTHS_TO_PIXELS
from .formatting_structure.build import build_formatting_structure
from .layout import layout
from . import draw
from . import utils
# This is a one-element tuple.
os.path.join(os.path.dirname(__file__), 'css', 'html4_ua.css')
class Document(object):
"""Abstract output document."""
def __init__(self, dom, user_stylesheets=None,
assert getattr(dom, 'tag', None) == 'html', (
'HTML document expected, got %r.' % (dom,))
#: lxml HtmlElement object
self.dom = dom
# Go through the property setter which calls ensure_url()
self.base_url = self.base_url
self.css_parser = WeasyCSSParser(parseComments=False)
self.user_stylesheets = user_stylesheets or []
self.user_agent_stylesheets = user_agent_stylesheets or []
self._computed_styles = None
self._formatting_structure = None
self._pages = None
self._image_cache = {}
def base_url(self):
"""The URL of the document, used for relative URLs it contains.
If set to something that does not look like a URL, the value is
assumed to be a filename and is converted to a file:// URL.
If that filename is relative, it is interpreted from the current
return self.dom.getroottree().docinfo.URL
def base_url(self, value):
if value:
value = utils.ensure_url(value)
self.dom.getroottree().docinfo.URL = value
def from_string(cls, source, encoding=None, **kwargs):
"""Make a document from an HTML string."""
parser = lxml.html.HTMLParser(encoding=encoding)
dom = lxml.html.document_fromstring(source, parser=parser)
return cls(dom, **kwargs)
def from_file(cls, file_or_filename_or_url, encoding=None, **kwargs):
Make a document from a filename or open file object.
parser = lxml.html.HTMLParser(encoding=encoding)
dom = lxml.html.parse(file_or_filename_or_url, parser=parser).getroot()
return cls(dom, **kwargs)
def style_for(self, element, pseudo_type=None):
Convenience method to get the computed styles for an element.
return self.computed_styles[(element, pseudo_type)]
def computed_styles(self):
dict of (element, pseudo_element_type) -> StyleDict
StyleDict: a dict of property_name -> PropertyValue,
also with attribute access
if self._computed_styles is None:
self._computed_styles = get_all_computed_styles(
return self._computed_styles
def formatting_structure(self):
The Box object for the root element. Represents the tree of all boxes.
if self._formatting_structure is None:
self._formatting_structure = build_formatting_structure(self)
return self._formatting_structure
def pages(self):
List of layed-out pages with an absolute size and postition
for every box.
if self._pages is None:
# "Linearize" code flow
_ = self.computed_styles
_ = self.formatting_structure
# Actual work
self._pages = layout(self)
return self._pages
def get_image_surface_from_uri(self, uri):
if uri in self._image_cache:
return self._image_cache[uri]
surface = draw.get_image_surface_from_uri(uri)
# TODO: have a more specific list of exception for network errors
# and PNG parsing errors.
except Exception:
surface = None
self._image_cache[uri] = surface
return surface
class PNGDocument(Document):
"""PNG output document."""
def __init__(self, dom, *args, **kwargs):
super(PNGDocument, self).__init__(dom, *args, **kwargs)
self.surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 1, 1)
def draw_page(self, page):
"""Draw a single page and return an ImageSurface."""
width = int(math.ceil(page.outer_width))
height = int(math.ceil(page.outer_height))
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
context = draw.CairoContext(surface)
draw.draw_page(self, page, context)
return width, height, surface
def write_page_to(self, page_index, target):
"""Write a single page as PNG into a file-like or filename `target`."""
_width, _height, surface = self.draw_page(self.pages[page_index])
def write_to(self, target):
"""Write all pages as PNG into a file-like or filename `target`.
Pages are layed out vertically each above the next and centered
pages = [self.draw_page(page) for page in self.pages]
total_height = sum(height for width, height, surface in pages)
max_width = max(width for width, height, surface in pages)
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32,
max_width, total_height)
context = draw.CairoContext(surface)
position_y = 0
for width, height, page_surface in pages:
position_x = (max_width - width) // 2
context.set_source_surface(page_surface, position_x, position_y)
position_y += height
class PDFDocument(Document):
"""PDF output document."""
def __init__(self, dom, *args, **kwargs):
super(PDFDocument, self).__init__(dom, *args, **kwargs)
# Use a dummy page size initially
self.surface = cairo.PDFSurface(None, 1, 1)
def write_to(self, target):
Write the whole document as PDF into a file-like or filename `target`.
# The actual page size is set for each page.
surface = cairo.PDFSurface(target, 1, 1)
px_to_pt = 1 / LENGTHS_TO_PIXELS['pt']
for page in self.pages:
# Actual page size is here. May be different between pages.
page.outer_width * px_to_pt,
page.outer_height * px_to_pt)
context = draw.CairoContext(surface)
context.scale(px_to_pt, px_to_pt)
draw.draw_page(self, page, context)