1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-04 16:07:57 +03:00

Embed JPEG-encoded images in PDF. Fix #6

If an image is in JPEG format, embed it as-is in the PDF output.
This often results in smaller PDF file size.

(The image is still decoded however,
so there is no rendering speed improvement.)
This commit is contained in:
Simon Sapin 2012-12-20 20:10:48 +01:00
parent 3a43ed6a8b
commit f243dbcca3
5 changed files with 50 additions and 20 deletions

View File

@ -97,6 +97,7 @@ class _TaggedTuple(tuple):
"""
def _get_metadata(box, bookmarks, links, anchors, matrix):
bookmark_label = box.bookmark_label
bookmark_level = box.bookmark_level
@ -128,6 +129,7 @@ def _get_metadata(box, bookmarks, links, anchors, matrix):
if has_anchor:
anchors[anchor_name] = pos_x, pos_y
def _prepare(box, bookmarks, links, anchors, matrix):
transform = _get_matrix(box)
if transform:

View File

@ -27,7 +27,7 @@ from .text import USING_INTROSPECTION
# Do not try to import PyGObject 3 if we already have PyGTK
# that tends to segfault.
if not USING_INTROSPECTION:
# Use PyGObject introspection
# Use PyGTK
try:
from gtk import gdk
from gtk.gdk import PixbufLoader
@ -38,7 +38,7 @@ if not USING_INTROSPECTION:
else:
def gdkpixbuf_loader(file_obj, string):
"""Load raster images with gdk-pixbuf through PyGTK."""
pixbuf = get_pixbuf(file_obj, string)
pixbuf, jpeg_data = get_pixbuf(file_obj, string)
dummy_context = cairo.Context(cairo.ImageSurface(
cairo.FORMAT_ARGB32, 1, 1))
gdk.CairoContext(dummy_context).set_source_pixbuf(pixbuf, 0, 0)
@ -48,7 +48,14 @@ if not USING_INTROSPECTION:
# It is therefore not thread-safe and state must be reset
# before any use.
get_pattern = dummy_context.get_source
if cairo.version_info >= (1, 10, 0):
add_jpeg_data(get_pattern().get_surface(), jpeg_data)
return get_pattern, pixbuf.get_width(), pixbuf.get_height()
def pixbuf_format(loader):
format_ = loader.get_format()
if format_:
return format_['name']
else:
# Use PyGObject introspection
try:
@ -58,6 +65,11 @@ else:
def gdkpixbuf_loader(file_obj, string, pixbuf_error=exception):
raise pixbuf_error
else:
def pixbuf_format(loader):
format_ = loader.get_format()
if format_:
return format_.get_name()
PIXBUF_VERSION = (GdkPixbuf.PIXBUF_MAJOR,
GdkPixbuf.PIXBUF_MINOR,
GdkPixbuf.PIXBUF_MICRO)
@ -75,7 +87,7 @@ else:
and Gdk.
"""
pixbuf = get_pixbuf(file_obj, string)
pixbuf, jpeg_data = get_pixbuf(file_obj, string)
dummy_context = cairo.Context(cairo.ImageSurface(
cairo.FORMAT_ARGB32, 1, 1))
Gdk.cairo_set_source_pixbuf(dummy_context, pixbuf, 0, 0)
@ -85,6 +97,8 @@ else:
# It is therefore not thread-safe and state must be reset
# before any use.
get_pattern = dummy_context.get_source
if cairo.version_info >= (1, 10, 0):
add_jpeg_data(get_pattern().get_surface(), jpeg_data)
return get_pattern, pixbuf.get_width(), pixbuf.get_height()
except ImportError:
@ -94,38 +108,40 @@ else:
without Gdk and going through PNG.
"""
pixbuf = get_pixbuf(file_obj, string)
pixbuf, jpeg_data = get_pixbuf(file_obj, string)
_, png = pixbuf.save_to_bufferv('png', ['compression'], ['0'])
return cairo_png_loader(None, png)
return cairo_png_loader(None, png, jpeg_data)
def get_pixbuf(file_obj=None, string=None, chunck_size=16 * 1024):
"""Create a Pixbuf object."""
if file_obj:
string = file_obj.read()
if not string:
raise ValueError('Could not load image: empty content')
loader = PixbufLoader()
try:
if file_obj:
while 1:
chunck = file_obj.read(chunck_size)
if not chunck:
break
loader.write(chunck)
elif string:
loader.write(string)
else:
raise ValueError('Could not load image: empty content')
finally:
# Pixbuf is really unhappy if we dont do this:
loader.close()
return loader.get_pixbuf()
jpeg_data = string if pixbuf_format(loader) == 'jpeg' else None
return loader.get_pixbuf(), jpeg_data
def cairo_png_loader(file_obj, string):
def cairo_png_loader(file_obj, string, jpeg_data=None):
"""Return a cairo Surface from a PNG byte stream."""
surface = cairo.ImageSurface.create_from_png(file_obj or BytesIO(string))
add_jpeg_data(surface, jpeg_data)
get_pattern = lambda: cairo.SurfacePattern(surface)
return get_pattern, surface.get_width(), surface.get_height()
def add_jpeg_data(surface, jpeg_data):
if jpeg_data and hasattr(surface, 'set_mime_data'):
surface.set_mime_data('image/jpeg', jpeg_data)
def cairosvg_loader(file_obj, string, uri):
"""Return a cairo Surface from a SVG byte stream.

View File

@ -151,7 +151,7 @@ def document_to_pixels(document, name, expected_width, expected_height):
def png_to_pixels(png_bytes, width, height):
pixbuf = get_pixbuf(string=png_bytes)
pixbuf, _ = get_pixbuf(string=png_bytes)
assert (pixbuf.get_width(), pixbuf.get_height()) == (width, height)
if not pixbuf.get_has_alpha():
pixbuf = pixbuf.add_alpha(False, 0, 0, 0) # no substitute color

View File

@ -15,6 +15,7 @@ from __future__ import division, unicode_literals
import io
import cairo
import pytest
from .. import CSS
from .. import pdf
@ -203,7 +204,6 @@ def test_bookmarks():
label='a', target=(0, 75, 1425))]
@assert_no_logs
def test_links():
links = get_links('<body>')
@ -294,3 +294,15 @@ def test_missing_links():
assert links == [[('internal', (0, 50, 935), (50, 950, 450, 935))]]
assert len(logs) == 1
assert 'WARNING: No anchor #missing for internal URI reference' in logs[0]
@assert_no_logs
def test_jpeg():
if not hasattr(cairo.ImageSurface, 'set_mime_data'):
pytest.xfail()
def render(html):
return TestHTML(base_url=resource_filename('dummy.html'),
string=html).write_pdf()
assert b'/Filter /DCTDecode' not in render('<img src="pattern.gif">')
# JPEG-encoded image, embedded in PDF:
assert b'/Filter /DCTDecode' in render('<img src="blue.jpg">')

View File

@ -84,7 +84,7 @@ def make_test_suite():
HTML(BASE_URL + name).write_png(
png_filename, stylesheets=[PAGE_SIZE_STYLESHEET])
with open(png_filename, 'rb') as image:
raw = get_pixbuf(file_obj=image).get_pixels()
raw, _ = get_pixbuf(file_obj=image).get_pixels()
rendered[name] = raw
return raw