mirror of
https://github.com/Kozea/WeasyPrint.git
synced 2024-10-05 00:21:15 +03:00
Refactor PDF dictionaries.
This commit is contained in:
parent
6e7ef6b62a
commit
6498c3fb00
@ -38,13 +38,6 @@ from . import VERSION_STRING
|
||||
from .compat import xrange, iteritems
|
||||
|
||||
|
||||
TRAILER_RE = re.compile(b'\ntrailer\n(.+)\nstartxref\n(\d+)\n%%EOF\n$',
|
||||
re.DOTALL)
|
||||
|
||||
# No end delimiter, + defaults to greedy
|
||||
DICT_TYPE_RE = re.compile(b'/Type /(\w+)')
|
||||
|
||||
|
||||
class PDFDictionary(object):
|
||||
def __init__(self, object_number, byte_string):
|
||||
self.object_number = object_number
|
||||
@ -54,35 +47,23 @@ class PDFDictionary(object):
|
||||
return self.__class__.__name__ + repr(
|
||||
(self.object_number, self.byte_string))
|
||||
|
||||
def get_type(self):
|
||||
return DICT_TYPE_RE.search(self.byte_string).group(1).decode('ascii')
|
||||
_re_cache = {}
|
||||
|
||||
# __cache is a shared mutable, not an actual parameter.
|
||||
def get_indirect_object_number(self, key, __cache={}):
|
||||
"""Read the value for `key`, assuming it is an indirect object.
|
||||
|
||||
:return: (int) the object number
|
||||
|
||||
"""
|
||||
regex = __cache.get(key)
|
||||
def _get_value(self, key, value_re):
|
||||
regex = self._re_cache.get((key, value_re))
|
||||
if not regex:
|
||||
regex = re.compile(('/%s (\d+) 0 R' % key).encode('ascii'))
|
||||
__cache[key] = regex
|
||||
return int(regex.search(self.byte_string).group(1))
|
||||
|
||||
# __cache is a shared mutable, not an actual parameter.
|
||||
def get_array(self, key, __cache={}):
|
||||
"""Read the value for `key`, assuming it is an array.
|
||||
|
||||
:return: (bytes) the unparsed array content.
|
||||
|
||||
"""
|
||||
regex = __cache.get(key)
|
||||
if not regex:
|
||||
regex = re.compile(('/%s \[([^\]]+)\]' % key).encode('ascii'))
|
||||
__cache[key] = regex
|
||||
regex = re.compile('/{} {}'.format(key, value_re).encode('ascii'))
|
||||
self._re_cache[key, value_re] = regex
|
||||
return regex.search(self.byte_string).group(1)
|
||||
|
||||
def get_type(self):
|
||||
"""
|
||||
:returns: the value for the /Type key.
|
||||
|
||||
"""
|
||||
# No end delimiter, + defaults to greedy
|
||||
return self._get_value('Type', '/(\w+)').decode('ascii')
|
||||
|
||||
def get_indirect_dict(self, key, pdf_file):
|
||||
"""Read the value for `key` and follow the reference, assuming
|
||||
it is an indirect dictionary object.
|
||||
@ -90,7 +71,7 @@ class PDFDictionary(object):
|
||||
:return: a new PDFDictionary instance.
|
||||
|
||||
"""
|
||||
object_number = self.get_indirect_object_number(key)
|
||||
object_number = int(self._get_value(key, '(\d+) 0 R'))
|
||||
return type(self)(object_number, pdf_file.read_object(object_number))
|
||||
|
||||
def get_indirect_dict_array(self, key, pdf_file):
|
||||
@ -100,7 +81,7 @@ class PDFDictionary(object):
|
||||
:return: a list of new PDFDictionary instance.
|
||||
|
||||
"""
|
||||
parts = self.get_array(key).split(b' 0 R')
|
||||
parts = self._get_value(key, '\[([^\]]+)\]').split(b' 0 R')
|
||||
# The array looks like this: ' <a> 0 R <b> 0 R <c> 0 R '
|
||||
# so `parts` ends up like this [' <a>', ' <b>', ' <c>', ' ']
|
||||
# With the trailing white space in the list.
|
||||
@ -117,11 +98,14 @@ class PDFFile(object):
|
||||
A seekable binary file-like object for a PDF generated by cairo.
|
||||
|
||||
"""
|
||||
trailer_re = re.compile(
|
||||
b'\ntrailer\n(.+)\nstartxref\n(\d+)\n%%EOF\n$', re.DOTALL)
|
||||
|
||||
def __init__(self, fileobj):
|
||||
# cairo’s trailer only has Size, Root and Info.
|
||||
# The trailer + startxref + EOF is typically under 100 bytes
|
||||
fileobj.seek(-200, os.SEEK_END)
|
||||
trailer, startxref = TRAILER_RE.search(fileobj.read()).groups()
|
||||
trailer, startxref = self.trailer_re.search(fileobj.read()).groups()
|
||||
trailer = PDFDictionary(None, trailer)
|
||||
startxref = int(startxref)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user