1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-04 16:07:57 +03:00

Change the way filenames are managed

This commit is contained in:
Guillaume Ayoub 2016-08-16 22:15:18 +02:00
parent b94546079d
commit 59e4fb5146
3 changed files with 24 additions and 21 deletions

View File

@ -12,6 +12,7 @@
from __future__ import division, unicode_literals
import codecs
import sys
import email
@ -24,6 +25,15 @@ __all__ = ['Request', 'base64_decode', 'base64_encode', 'basestring',
'urlparse_uses_relative', 'urlsplit', 'xrange']
# getfilesystemencoding() on Linux is sometimes stupid...
FILESYSTEM_ENCODING = sys.getfilesystemencoding() or 'utf-8'
try:
if codecs.lookup(FILESYSTEM_ENCODING).name == 'ascii':
FILESYSTEM_ENCODING = 'utf-8'
except LookupError:
FILESYSTEM_ENCODING = 'utf-8'
if sys.version_info[0] >= 3:
# Python 3
from urllib.parse import (
@ -63,7 +73,7 @@ else:
from urlparse import (urljoin, urlsplit, parse_qs,
uses_relative as urlparse_uses_relative)
from urllib2 import urlopen, Request
from urllib import pathname2url, quote, unquote, urlencode
from urllib import pathname2url as _pathname2url, quote, unquote, urlencode
from array import array as _array
from itertools import izip, imap
from base64 import (decodestring as base64_decode,
@ -77,6 +87,11 @@ else:
def array(typecode, initializer):
return _array(typecode.encode('ascii'), initializer)
def pathname2url(path):
if isinstance(path, unicode):
path = path.encode(FILESYSTEM_ENCODING)
return _pathname2url(path)
def urllib_get_content_type(urlobj):
return urlobj.info().gettype()

View File

@ -11,7 +11,6 @@
from __future__ import division, unicode_literals
import io
import sys
import math
import shutil
import functools
@ -28,8 +27,7 @@ from .layout import layout_document
from .layout.backgrounds import percentage
from .draw import draw_page, stacked
from .pdf import write_pdf_metadata
from .compat import izip, iteritems, unicode
from .urls import FILESYSTEM_ENCODING
from .compat import izip, iteritems
def _get_matrix(box):
@ -551,9 +549,6 @@ class Document(object):
surface.write_to_png(target)
png_bytes = target.getvalue()
else:
if sys.version_info[0] < 3 and isinstance(target, unicode):
# py2cairo 1.8 does not support unicode filenames.
target = target.encode(FILESYSTEM_ENCODING)
surface.write_to_png(target)
png_bytes = None
return png_bytes, max_width, sum_heights

View File

@ -15,7 +15,6 @@ from __future__ import division, unicode_literals
import io
import re
import sys
import codecs
import os.path
import mimetypes
import contextlib
@ -40,15 +39,6 @@ else:
mimetypes.add_type(b'image/svg+xml', b'.svg')
# getfilesystemencoding() on Linux is sometimes stupid...
FILESYSTEM_ENCODING = sys.getfilesystemencoding() or 'utf-8'
try:
if codecs.lookup(FILESYSTEM_ENCODING).name == 'ascii':
FILESYSTEM_ENCODING = 'utf-8'
except LookupError:
FILESYSTEM_ENCODING = 'utf-8'
# See http://stackoverflow.com/a/11687993/1162888
# Both are needed in Python 3 as the re module does not like to mix
# http://tools.ietf.org/html/rfc3986#section-3.1
@ -64,8 +54,7 @@ def iri_to_uri(url):
# Data URIs can be huge, but dont need this anyway.
return url
# Use UTF-8 as per RFC 3987 (IRI), except for file://
url = url.encode(FILESYSTEM_ENCODING
if url.startswith('file:') else 'utf-8')
url = url.encode('utf-8')
# This is a full URI, not just a component. Only %-encode characters
# that are not allowed at all in URIs. Everthing else is "safe":
# * Reserved characters: /:?#[]@!$&'()*+,;=
@ -82,8 +71,6 @@ def path2url(path):
# Make sure directory names have a trailing slash.
# Otherwise relative URIs are resolved from the parent directory.
path += os.path.sep
if isinstance(path, unicode):
path = path.encode(FILESYSTEM_ENCODING)
path = pathname2url(path)
if path.startswith('///'):
# On Windows pathname2url(r'C:\foo') is apparently '///C:/foo'
@ -271,6 +258,12 @@ def default_url_fetcher(url):
"""
if url.lower().startswith('data:'):
return open_data_url(url)
elif url.lower().startswith('file:'):
filename = unquote(url[7:]) # len('file://') == 7
mime_type = mimetypes.guess_type(filename)[0]
with open(filename, 'rb') as fd:
return dict(filename=filename, string=fd.read(),
mime_type=mime_type, redirected_url=url)
elif UNICODE_SCHEME_RE.match(url):
url = iri_to_uri(url)
response = urlopen(Request(url, headers=HTTP_HEADERS))