mirror of
https://github.com/Kozea/WeasyPrint.git
synced 2024-09-11 20:47:56 +03:00
Only add AF values for PDF/A-2+
This commit is contained in:
parent
05a34e7d9b
commit
cec6e5ec3e
@ -6,8 +6,8 @@ importing sub-modules.
|
||||
"""
|
||||
|
||||
import contextlib
|
||||
import datetime
|
||||
import os
|
||||
from datetime import datetime
|
||||
from os.path import getctime, getmtime
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin
|
||||
|
||||
@ -310,46 +310,46 @@ class Attachment:
|
||||
|
||||
An instance is created in the same way as :class:`HTML`, except that the
|
||||
HTML specific arguments (``encoding`` and ``media_type``) are not
|
||||
supported. An optional description can be provided with the ``description``
|
||||
argument.
|
||||
supported.
|
||||
|
||||
:param description:
|
||||
:param str description:
|
||||
A description of the attachment to be included in the PDF document.
|
||||
May be :obj:`None`.
|
||||
:type created: :obj:`datetime.datetime`
|
||||
:param created:
|
||||
Creation date and time. Default is current date and time.
|
||||
:type modified: :obj:`datetime.datetime`
|
||||
:param modified:
|
||||
Modification date and time. Default is current date and time.
|
||||
:param str relationship:
|
||||
A string that represents the relationship between the attachment and
|
||||
the PDF it is embedded in. Default is 'Unspecified', other common
|
||||
values are defined in ISO-32000-2:2020, 7.11.3.
|
||||
|
||||
"""
|
||||
def __init__(self, guess=None, filename=None, url=None, file_obj=None,
|
||||
string=None, base_url=None, url_fetcher=default_url_fetcher,
|
||||
description=None, created=None, modified=None,
|
||||
af_relationship="Source"):
|
||||
relationship='Unspecified'):
|
||||
self.source = _select_source(
|
||||
guess, filename, url, file_obj, string, base_url=base_url,
|
||||
url_fetcher=url_fetcher)
|
||||
self.description = description
|
||||
self.af_relationship = af_relationship
|
||||
self.relationship = relationship
|
||||
self.md5 = None
|
||||
|
||||
def epoch_to_pdf(epoch):
|
||||
dt_object = datetime.datetime.fromtimestamp(epoch)
|
||||
return datetime_to_pdf(dt_object)
|
||||
|
||||
def datetime_to_pdf(dt_object):
|
||||
return dt_object.strftime("D:%Y%m%d%H%M%SZ")
|
||||
|
||||
if created:
|
||||
self.created = created
|
||||
else:
|
||||
if created is None:
|
||||
if filename:
|
||||
self.created = epoch_to_pdf(os.path.getctime(filename))
|
||||
created = datetime.fromtimestamp(getctime(filename))
|
||||
else:
|
||||
self.created = datetime_to_pdf(datetime.datetime.now())
|
||||
|
||||
if modified:
|
||||
self.modified = modified
|
||||
else:
|
||||
created = datetime.now()
|
||||
if modified is None:
|
||||
if filename:
|
||||
self.modified = epoch_to_pdf(os.path.getmtime(filename))
|
||||
modified = datetime.fromtimestamp(getmtime(filename))
|
||||
else:
|
||||
self.modified = datetime_to_pdf(datetime.datetime.now())
|
||||
modified = datetime.now()
|
||||
self.created = created.strftime('D:%Y%m%d%H%M%SZ')
|
||||
self.modified = modified.strftime('D:%Y%m%d%H%M%SZ')
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
|
@ -254,12 +254,9 @@ def generate_pdf(document, target, zoom, **options):
|
||||
pdf_attachments.append(pdf_attachment)
|
||||
if pdf_attachments:
|
||||
content = pydyf.Dictionary({'Names': pydyf.Array()})
|
||||
if 'AF' not in pdf.catalog:
|
||||
pdf.catalog['AF'] = pydyf.Array()
|
||||
for i, pdf_attachment in enumerate(pdf_attachments):
|
||||
content['Names'].append(pydyf.String(f'attachment{i}'))
|
||||
content['Names'].append(pdf_attachment.reference)
|
||||
pdf.catalog['AF'].append(pdf_attachment.reference)
|
||||
pdf.add_object(content)
|
||||
if 'Names' not in pdf.catalog:
|
||||
pdf.catalog['Names'] = pydyf.Dictionary()
|
||||
|
@ -293,58 +293,54 @@ def write_pdf_attachment(pdf, attachment, url_fetcher, compress):
|
||||
elif not isinstance(attachment, Attachment):
|
||||
attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)
|
||||
|
||||
uncompressed_length = 0
|
||||
stream = b''
|
||||
md5 = hashlib.md5()
|
||||
try:
|
||||
with attachment.source as (source_type, source, url, _):
|
||||
with attachment.source as (_, source, url, _):
|
||||
if isinstance(source, bytes):
|
||||
source = io.BytesIO(source)
|
||||
uncompressed_length = 0
|
||||
stream = b''
|
||||
md5 = hashlib.md5()
|
||||
for data in iter(lambda: source.read(4096), b''):
|
||||
uncompressed_length += len(data)
|
||||
md5.update(data)
|
||||
stream += data
|
||||
mime_type, _ = mimetypes.guess_type(url, strict=False)
|
||||
if not mime_type:
|
||||
mime_type = 'application/octet-stream'
|
||||
mime_type = '/' + mime_type.replace('/', '#2f')
|
||||
file_extra = pydyf.Dictionary({
|
||||
'Type': '/EmbeddedFile',
|
||||
"Subtype": mime_type,
|
||||
'Params': pydyf.Dictionary({
|
||||
'CheckSum': f'<{md5.hexdigest()}>',
|
||||
'Size': uncompressed_length,
|
||||
'CreationDate': attachment.created,
|
||||
'ModDate': attachment.modified,
|
||||
})
|
||||
})
|
||||
file_stream = pydyf.Stream([stream], file_extra, compress=compress)
|
||||
pdf.add_object(file_stream)
|
||||
|
||||
except URLFetchingError as exception:
|
||||
LOGGER.error('Failed to load attachment: %s', exception)
|
||||
return
|
||||
attachment.md5 = md5.hexdigest()
|
||||
|
||||
# TODO: Use the result object from a URL fetch operation to provide more
|
||||
# details on the possible filename.
|
||||
# details on the possible filename and MIME type.
|
||||
if url and urlsplit(url).path:
|
||||
filename = basename(unquote(urlsplit(url).path))
|
||||
else:
|
||||
filename = 'attachment.bin'
|
||||
mime_type = mimetypes.guess_type(filename, strict=False)[0]
|
||||
if not mime_type:
|
||||
mime_type = 'application/octet-stream'
|
||||
|
||||
attachment = pydyf.Dictionary({
|
||||
file_extra = pydyf.Dictionary({
|
||||
'Type': '/EmbeddedFile',
|
||||
'Subtype': f'/{mime_type.replace("/", "#2f")}',
|
||||
'Params': pydyf.Dictionary({
|
||||
'CheckSum': f'<{attachment.md5}>',
|
||||
'Size': uncompressed_length,
|
||||
'CreationDate': attachment.created,
|
||||
'ModDate': attachment.modified,
|
||||
})
|
||||
})
|
||||
file_stream = pydyf.Stream([stream], file_extra, compress=compress)
|
||||
pdf.add_object(file_stream)
|
||||
|
||||
pdf_attachment = pydyf.Dictionary({
|
||||
'Type': '/Filespec',
|
||||
'F': pydyf.String(),
|
||||
'UF': pydyf.String(filename),
|
||||
"AFRelationship": "/"+attachment.af_relationship,
|
||||
'EF': pydyf.Dictionary({'F': file_stream.reference}),
|
||||
'Desc': pydyf.String(attachment.description or ''),
|
||||
})
|
||||
pdf.add_object(attachment)
|
||||
if "AF" not in pdf.catalog:
|
||||
pdf.catalog["AF"] = pydyf.Array()
|
||||
pdf.catalog["AF"].append(attachment.reference)
|
||||
return attachment
|
||||
pdf.add_object(pdf_attachment)
|
||||
return pdf_attachment
|
||||
|
||||
|
||||
def resolve_links(pages):
|
||||
|
@ -34,6 +34,30 @@ def pdfa(pdf, metadata, document, page_streams, compress, version):
|
||||
}),
|
||||
])
|
||||
|
||||
# Add AF for attachments
|
||||
if version >= 2:
|
||||
attachments = []
|
||||
if 'Names' in pdf.catalog and 'EmbeddedFiles' in pdf.catalog['Names']:
|
||||
reference = int(pdf.catalog['Names']['EmbeddedFiles'].split()[0])
|
||||
names = pdf.objects[reference]
|
||||
for name in names[1::2]:
|
||||
attachments.append(name)
|
||||
relationships = {
|
||||
attachment.md5: attachment.relationship
|
||||
for attachment in document.metadata.attachments
|
||||
if attachment.md5}
|
||||
for pdf_object in pdf.objects:
|
||||
if isinstance(pdf_object, dict):
|
||||
if pdf_object.get('Type') == '/Filespec':
|
||||
checksum = pdf_object['CheckSum']
|
||||
relationship = relationships.get(checksum, 'Unspecified')
|
||||
pdf_object['AFRelationship'] = f'/{relationship}'
|
||||
attachments.append(pdf_object.reference)
|
||||
if attachments:
|
||||
if 'AF' not in pdf.catalog:
|
||||
pdf.catalog['AF'] = pydyf.Array()
|
||||
pdf.catalog['AF'].extend(attachments)
|
||||
|
||||
# Print annotations
|
||||
for pdf_object in pdf.objects:
|
||||
if isinstance(pdf_object, dict) and pdf_object.get('Type') == '/Annot':
|
||||
|
Loading…
Reference in New Issue
Block a user