mirror of
https://github.com/Kozea/WeasyPrint.git
synced 2024-09-11 20:47:56 +03:00
Add --cache-folder option to temporarily store images on disk
This commit is contained in:
parent
07e43dc4c2
commit
44001c5383
@ -138,7 +138,11 @@ class HTML:
|
||||
:param font_config: A font configuration handling ``@font-face`` rules.
|
||||
:type counter_style: :class:`css.counters.CounterStyle`
|
||||
:param counter_style: A dictionary storing ``@counter-style`` rules.
|
||||
:param dict image_cache: A dictionary used to cache images.
|
||||
:param image_cache:
|
||||
A dictionary used to cache images, or a folder path where images
|
||||
are temporarily stored.
|
||||
:type image_cache:
|
||||
:obj:`dict`, :obj:`str` or :class:`document.DiskCache`
|
||||
:param bool forms: Whether PDF forms have to be included.
|
||||
:returns: A :class:`document.Document` object.
|
||||
|
||||
@ -186,7 +190,11 @@ class HTML:
|
||||
:param font_config: A font configuration handling ``@font-face`` rules.
|
||||
:type counter_style: :class:`css.counters.CounterStyle`
|
||||
:param counter_style: A dictionary storing ``@counter-style`` rules.
|
||||
:param dict image_cache: A dictionary used to cache images.
|
||||
:param image_cache:
|
||||
A dictionary used to cache images, or a folder path where images
|
||||
are temporarily stored.
|
||||
:type image_cache:
|
||||
:obj:`dict`, :obj:`str` or :class:`document.DiskCache`
|
||||
:param bytes identifier: A bytestring used as PDF file identifier.
|
||||
:param str variant: A PDF variant name.
|
||||
:param str version: A PDF version number.
|
||||
|
@ -94,6 +94,11 @@ def main(argv=None, stdout=None, stdin=None):
|
||||
multiple times, ``all`` adds all allowed values, ``none`` removes all
|
||||
previously set values.
|
||||
|
||||
.. option:: -c <folder>, --cache-folder <folder>
|
||||
|
||||
Store cache on disk instead of memory. The ``folder`` is created if
|
||||
needed and cleaned after the PDF is generated.
|
||||
|
||||
.. option:: -v, --verbose
|
||||
|
||||
Show warnings and information messages.
|
||||
@ -156,6 +161,10 @@ def main(argv=None, stdout=None, stdin=None):
|
||||
'-O', '--optimize-size', action='append',
|
||||
help='optimize output size for specified features',
|
||||
choices=('images', 'fonts', 'all', 'none'), default=['fonts'])
|
||||
parser.add_argument(
|
||||
'-c', '--cache-folder',
|
||||
help='Store cache on disk instead of memory. The ``folder`` is '
|
||||
'created if needed and cleaned after the PDF is generated.')
|
||||
parser.add_argument(
|
||||
'-v', '--verbose', action='store_true',
|
||||
help='show warnings and information messages')
|
||||
@ -203,6 +212,7 @@ def main(argv=None, stdout=None, stdin=None):
|
||||
'version': args.pdf_version,
|
||||
'forms': args.pdf_forms,
|
||||
'custom_metadata': args.custom_metadata,
|
||||
'image_cache': args.cache_folder,
|
||||
}
|
||||
|
||||
# Default to logging to stderr.
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
import functools
|
||||
import io
|
||||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
|
||||
from . import CSS
|
||||
from .anchors import gather_anchors, make_page_bookmark_tree
|
||||
@ -158,6 +160,52 @@ class DocumentMetadata:
|
||||
self.custom = custom or {}
|
||||
|
||||
|
||||
class DiskCache:
|
||||
"""Dict-like storing images content on disk.
|
||||
|
||||
Bytestrings values are stored on disk. Other Python objects (i.e.
|
||||
RasterImage instances) are still stored in memory, but are much more
|
||||
lightweight.
|
||||
|
||||
"""
|
||||
def __init__(self, folder):
|
||||
self._path = Path(folder)
|
||||
self._path.mkdir(parents=True, exist_ok=True)
|
||||
self._memory_cache = {}
|
||||
self._disk_paths = set()
|
||||
|
||||
def _path_from_key(self, key):
|
||||
return self._path / md5(key.encode()).hexdigest()
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key in self._memory_cache:
|
||||
return self._memory_cache[key]
|
||||
else:
|
||||
return self._path_from_key(key).read_bytes()
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if isinstance(value, bytes):
|
||||
path = self._path_from_key(key)
|
||||
self._disk_paths.add(path)
|
||||
path.write_bytes(value)
|
||||
else:
|
||||
self._memory_cache[key] = value
|
||||
|
||||
def __contains__(self, key):
|
||||
return (
|
||||
key in self._memory_cache or
|
||||
self._path_from_key(key).exists())
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
for path in self._disk_paths:
|
||||
path.unlink(missing_ok=True)
|
||||
self._path.rmdir()
|
||||
except Exception:
|
||||
# Silently ignore errors while clearing cache
|
||||
pass
|
||||
|
||||
|
||||
class Document:
|
||||
"""A rendered document ready to be painted in a pydyf stream.
|
||||
|
||||
@ -180,7 +228,10 @@ class Document:
|
||||
target_collector = TargetCollector()
|
||||
page_rules = []
|
||||
user_stylesheets = []
|
||||
image_cache = {} if image_cache is None else image_cache
|
||||
if image_cache is None:
|
||||
image_cache = {}
|
||||
elif not isinstance(image_cache, DiskCache):
|
||||
image_cache = DiskCache(image_cache)
|
||||
for css in stylesheets or []:
|
||||
if not hasattr(css, 'matcher'):
|
||||
css = CSS(
|
||||
|
@ -36,9 +36,9 @@ class ImageLoadingError(ValueError):
|
||||
|
||||
|
||||
class RasterImage:
|
||||
def __init__(self, pillow_image, image_id, optimize_size, cache_path=None):
|
||||
def __init__(self, pillow_image, image_id, optimize_size, cache):
|
||||
self.id = image_id
|
||||
self._cache_path = cache_path
|
||||
self._cache = cache
|
||||
|
||||
if 'transparency' in pillow_image.info:
|
||||
pillow_image = pillow_image.convert('RGBA')
|
||||
@ -92,7 +92,7 @@ class RasterImage:
|
||||
alpha = pillow_image.getchannel('A')
|
||||
pillow_image = pillow_image.convert(pillow_image.mode[:-1])
|
||||
alpha_data = self._get_png_data(alpha, optimize)
|
||||
stream = self.get_stream(alpha_data)
|
||||
stream = self.get_stream(alpha_data, alpha=True)
|
||||
self.extra['SMask'] = pydyf.Stream(stream, extra={
|
||||
'Filter': '/FlateDecode',
|
||||
'Type': '/XObject',
|
||||
@ -151,20 +151,20 @@ class RasterImage:
|
||||
return b''.join(png_data)
|
||||
|
||||
def get_stream(self, data, alpha=False):
|
||||
if self._cache_path:
|
||||
path = self._cache_path / f'{self.id}{int(alpha)}'
|
||||
path.write_bytes(data)
|
||||
return [LazyImage(path)]
|
||||
else:
|
||||
return [data]
|
||||
key = f'{self.id}{int(alpha)}'
|
||||
return [LazyImage(self._cache, key, data)]
|
||||
|
||||
|
||||
class LazyImage:
|
||||
def __init__(self, path):
|
||||
self._path = path
|
||||
class LazyImage(pydyf.Object):
|
||||
def __init__(self, cache, key, data):
|
||||
super().__init__()
|
||||
self._key = key
|
||||
self._cache = cache
|
||||
cache[key] = data
|
||||
|
||||
def __bytes__(self):
|
||||
self._path.read_bytes()
|
||||
@property
|
||||
def data(self):
|
||||
return self._cache[self._key]
|
||||
|
||||
|
||||
class SVGImage:
|
||||
@ -240,13 +240,14 @@ def get_image_from_uri(cache, url_fetcher, optimize_size, url,
|
||||
else:
|
||||
# Store image id to enable cache in Stream.add_image
|
||||
image_id = md5(url.encode()).hexdigest()
|
||||
# Keep image format as it is discarded by transposition
|
||||
pillow_image = rotate_pillow_image(pillow_image, orientation)
|
||||
image = RasterImage(pillow_image, image_id, optimize_size)
|
||||
image = RasterImage(
|
||||
pillow_image, image_id, optimize_size, cache)
|
||||
|
||||
except (URLFetchingError, ImageLoadingError) as exception:
|
||||
LOGGER.error('Failed to load image at %r: %s', url, exception)
|
||||
image = None
|
||||
|
||||
cache[url] = image
|
||||
return image
|
||||
|
||||
@ -269,6 +270,8 @@ def rotate_pillow_image(pillow_image, orientation):
|
||||
if flip:
|
||||
pillow_image = pillow_image.transpose(
|
||||
Image.Transpose.FLIP_LEFT_RIGHT)
|
||||
|
||||
# Keep image format as it is discarded by transposition
|
||||
pillow_image.format = image_format
|
||||
return pillow_image
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user