1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 00:21:15 +03:00
WeasyPrint/weasyprint/text.py

437 lines
16 KiB
Python
Raw Normal View History

# coding: utf8
"""
weasyprint.text
---------------
2011-08-19 18:52:56 +04:00
Interface with Pango to decide where to do line breaks and to draw text.
2011-08-19 18:52:56 +04:00
:copyright: Copyright 2011-2012 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
2011-08-19 18:52:56 +04:00
"""
from __future__ import division
# XXX No unicode_literals, cffi likes native strings
2011-09-30 13:54:56 +04:00
from cgi import escape
import pyphen
import cffi
import cairocffi as cairo
from .compat import xrange, basestring
2012-12-26 16:25:18 +04:00
ffi = cffi.FFI()
ffi.cdef('''
typedef enum {
NORMAL,
OBLIQUE,
ITALIC
} PangoStyle;
typedef enum {
THIN = 100,
ULTRALIGHT = 200,
LIGHT = 300,
BOOK = 380,
NORMAL = 400,
MEDIUM = 500,
SEMIBOLD = 600,
BOLD = 700,
ULTRABOLD = 800,
HEAVY = 900,
ULTRAHEAVY = 1000
} PangoWeight;
typedef enum {
NORMAL,
SMALL_CAPS
} PangoVariant;
typedef enum {
ULTRA_CONDENSED,
EXTRA_CONDENSED,
CONDENSED,
SEMI_CONDENSED,
NORMAL,
SEMI_EXPANDED,
EXPANDED,
EXTRA_EXPANDED,
ULTRA_EXPANDED
} PangoStretch;
typedef enum {
WRAP_WORD,
WRAP_CHAR,
WRAP_WORD_CHAR
} PangoWrapMode;
2012-12-29 17:20:38 +04:00
typedef unsigned int guint;
typedef int gint;
typedef gint gboolean;
2012-12-29 14:50:11 +04:00
typedef void* gpointer;
typedef ... cairo_t;
typedef ... PangoLayout;
typedef ... PangoFontDescription;
typedef ... PangoLayoutIter;
2012-12-29 17:20:38 +04:00
typedef ... PangoAttrList;
typedef ... PangoAttrClass;
typedef struct {
const PangoAttrClass *klass;
guint start_index;
guint end_index;
} PangoAttribute;
typedef struct {
PangoLayout *layout;
gint start_index;
gint length;
/* ... */
} PangoLayoutLine;
double pango_units_to_double (int i);
int pango_units_from_double (double d);
2012-12-29 17:20:38 +04:00
void g_object_unref (gpointer object);
2012-12-29 17:20:38 +04:00
PangoLayout * pango_cairo_create_layout (cairo_t *cr);
void pango_layout_set_wrap (PangoLayout *layout, PangoWrapMode wrap);
2012-12-29 17:20:38 +04:00
void pango_layout_set_width (PangoLayout *layout, int width);
void pango_layout_set_attributes(PangoLayout *layout, PangoAttrList *attrs);
void pango_layout_set_text (
PangoLayout *layout, const char *text, int length);
2012-12-29 17:20:38 +04:00
void pango_layout_set_font_description (
PangoLayout *layout, const PangoFontDescription *desc);
PangoFontDescription * pango_font_description_new (void);
void pango_font_description_free (PangoFontDescription *desc);
void pango_font_description_set_family (
PangoFontDescription *desc, const char *family);
void pango_font_description_set_variant (
PangoFontDescription *desc, PangoVariant variant);
void pango_font_description_set_style (
PangoFontDescription *desc, PangoStyle style);
void pango_font_description_set_stretch (
PangoFontDescription *desc, PangoStretch stretch);
void pango_font_description_set_weight (
PangoFontDescription *desc, PangoWeight weight);
void pango_font_description_set_absolute_size (
PangoFontDescription *desc, double size);
2012-12-29 17:20:38 +04:00
PangoAttrList * pango_attr_list_new (void);
void pango_attr_list_unref (PangoAttrList *list);
void pango_attr_list_insert (
PangoAttrList *list, PangoAttribute *attr);
PangoAttribute * pango_attr_letter_spacing_new (int letter_spacing);
void pango_attribute_destroy (PangoAttribute *attr);
PangoLayoutIter * pango_layout_get_iter (PangoLayout *layout);
void pango_layout_iter_free (PangoLayoutIter *iter);
gboolean pango_layout_iter_next_line (PangoLayoutIter *iter);
PangoLayoutLine * pango_layout_iter_get_line_readonly (
PangoLayoutIter *iter);
int pango_layout_iter_get_baseline (PangoLayoutIter *iter);
typedef struct {
int x;
int y;
int width;
int height;
} PangoRectangle;
void pango_layout_line_get_extents (
PangoLayoutLine *line,
PangoRectangle *ink_rect, PangoRectangle *logical_rect);
2012-12-29 04:00:30 +04:00
void pango_cairo_update_layout (cairo_t *cr, PangoLayout *layout);
void pango_cairo_show_layout_line (cairo_t *cr, PangoLayoutLine *line);
''')
def dlopen(ffi, *names):
"""Try various names for the same libraries, for different platforms."""
for name in names:
try:
return ffi.dlopen(name)
except OSError:
pass
# Re-raise the exception.
return ffi.dlopen(names[0]) # pragma: no cover
2012-05-31 03:14:15 +04:00
gobject = dlopen(ffi, 'gobject-2.0', 'libgobject-2.0-0')
pango = dlopen(ffi, 'pango-1.0', 'libpango-1.0-0')
pangocairo = dlopen(ffi, 'pangocairo-1.0', 'libpangocairo-1.0-0')
units_to_double = pango.pango_units_to_double
units_from_double = pango.pango_units_from_double
PYPHEN_DICTIONARY_CACHE = {}
def to_enum(string):
return str(string.replace('-', '_').upper())
def unicode_to_char_p(string):
2012-12-29 17:20:38 +04:00
bytestring = string.encode('utf8').replace(b'\x00', b'')
return ffi.new('char[]', bytestring), bytestring
def get_size(line):
logical_extents = ffi.new('PangoRectangle *')
pango.pango_layout_line_get_extents(line, ffi.NULL, logical_extents)
return (units_to_double(logical_extents.width),
units_to_double(logical_extents.height))
class Layout(object):
"""Object holding PangoLayout-related cdata pointers."""
def iter_lines(self):
layout_iter = ffi.gc(
pango.pango_layout_get_iter(self.layout),
pango.pango_layout_iter_free)
while 1:
yield pango.pango_layout_iter_get_line_readonly(layout_iter)
if not pango.pango_layout_iter_next_line(layout_iter):
return
def set_text(self, text):
2012-12-29 17:20:38 +04:00
text, bytestring = unicode_to_char_p(text)
self.text = text
self.text_bytes = bytestring
pango.pango_layout_set_text(self.layout, text, -1)
def create_layout(text, style, hinting, max_width):
"""Return an opaque Pango layout with default Pango line-breaks.
2011-08-19 18:52:56 +04:00
:param text: Unicode
:param style: a :class:`StyleDict` of computed values
:param hinting: whether to enable text hinting or not
:param max_width:
The maximum available width in the same unit as ``style.font_size``,
or ``None`` for unlimited width.
"""
layout_obj = Layout()
dummy_context = layout_obj.dummy_context = (
cairo.Context(cairo.ImageSurface('ARGB32', 1, 1))
if hinting else
cairo.Context(cairo.PDFSurface(None, 1, 1)))
layout = layout_obj.layout = ffi.gc(
pangocairo.pango_cairo_create_layout(ffi.cast(
2012-12-30 14:08:55 +04:00
'cairo_t *', dummy_context._pointer)),
gobject.g_object_unref)
font = layout_obj.font = ffi.gc(
pango.pango_font_description_new(),
pango.pango_font_description_free)
assert not isinstance(style.font_family, basestring), (
'font_family should be a list')
font_family = layout_obj.font_family = unicode_to_char_p(
2012-12-29 17:20:38 +04:00
','.join(style.font_family))[0]
pango.pango_font_description_set_family(font, font_family)
pango.pango_font_description_set_variant(font, to_enum(style.font_variant))
pango.pango_font_description_set_style(font, to_enum(style.font_style))
pango.pango_font_description_set_stretch(font, to_enum(style.font_stretch))
pango.pango_font_description_set_weight(font, style.font_weight)
pango.pango_font_description_set_absolute_size(
font, units_from_double(style.font_size))
pango.pango_layout_set_font_description(layout, font)
pango.pango_layout_set_wrap(layout, 'WRAP_WORD')
layout_obj.set_text(text)
# Make sure that max_width * Pango.SCALE == max_width * 1024 fits in a
# signed integer. Treat bigger values same as None: unconstrained width.
if max_width is not None and max_width < 2 ** 21:
pango.pango_layout_set_width(layout, units_from_double(max_width))
word_spacing = style.word_spacing
letter_spacing = style.letter_spacing
if letter_spacing == 'normal':
letter_spacing = 0
if text and (word_spacing != 0 or letter_spacing != 0):
letter_spacing = units_from_double(letter_spacing)
2012-12-29 17:20:38 +04:00
space_spacing = units_from_double(word_spacing) + letter_spacing
attr_list = pango.pango_attr_list_new()
def add_attr(start, end, spacing):
attr = pango.pango_attr_letter_spacing_new(spacing)
attr.start_index = start
attr.end_index = end
pango.pango_attr_list_insert(attr_list, attr)
text_bytes = layout_obj.text_bytes
add_attr(0, len(text_bytes) + 1, letter_spacing)
position = text_bytes.find(b' ')
while position != -1:
add_attr(position, position + 1, space_spacing)
position = text_bytes.find(b' ', position + 1)
pango.pango_layout_set_attributes(layout, attr_list)
pango.pango_attr_list_unref(attr_list)
return layout_obj
def split_first_line(text, style, hinting, max_width, line_width):
"""Fit as much as possible in the available width for one line of text.
Return ``(layout, length, resume_at, width, height, baseline)``.
``layout``: a pango Layout with the first line
``length``: length in UTF-8 bytes of the first line
``resume_at``: The number of UTF-8 bytes to skip for the next line.
May be ``None`` if the whole text fits in one line.
This may be greater than ``length`` in case of preserved
newline characters.
``width``: width in pixels of the first line
``height``: height in pixels of the first line
``baseline``: baseline in pixels of the first line
2011-08-19 18:52:56 +04:00
2011-07-29 03:13:07 +04:00
"""
2012-11-22 23:55:09 +04:00
# Step #1: Get a draft layout with the first line
layout = None
if max_width:
expected_length = int(max_width / style.font_size * 2.5)
if expected_length < len(text):
# Try to use a small amount of text instead of the whole text
layout = create_layout(
text[:expected_length], style, hinting, max_width)
2012-12-29 04:00:30 +04:00
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
if second_line is None:
# The small amount of text fits in one line, give up and use
# the whole text
layout = None
2012-12-29 04:00:30 +04:00
if layout is None:
layout = create_layout(text, style, hinting, max_width)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
2012-11-22 23:55:09 +04:00
if second_line is not None:
resume_at = second_line.start_index
2012-11-22 23:55:09 +04:00
else:
resume_at = None
# Step #2: Build the final layout
hyphenated = False
first_line_width, _height = get_size(first_line)
if max_width and (second_line is not None or first_line_width > max_width):
if first_line_width <= max_width:
# The first line may have been cut too early by pango
second_line_index = second_line.start_index
first_part = text.encode('u8')[:second_line_index].decode('u8')
second_part = text.encode('u8')[second_line_index:].decode('u8')
else:
first_part = ''
second_part = text
2012-11-22 23:55:09 +04:00
next_word = second_part.split(' ', 1)[0]
if next_word:
# next_word might fit without a space afterwards.
# Pango previously counted that spaces advance width.
2012-11-22 23:55:09 +04:00
new_first_line = first_part + next_word
layout.set_text(new_first_line)
2012-12-29 14:50:11 +04:00
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
# TODO: find another way to avoid very long lines, hyphenize may
# only keep the first word by splitting not only with simple spaces
max_long_line = 50
hyphens = style.hyphens
lang = style.lang
if hyphens in ('none', 'manual') or lang not in pyphen.LANGUAGES:
hyphens = 0 # No automatic hyphenation
elif hyphens == 'auto':
hyphens = 0.9 # Default threshold
if hyphens > 0:
first_line_width, _height = get_size(first_line)
ratio = (
(first_line_width + line_width - max_width) / line_width)
else:
ratio = 1
if second_line is None and ratio <= 1:
# The next word fits in the first line, keep the layout
2012-11-22 23:55:09 +04:00
resume_at = len(new_first_line.encode('utf-8')) + 1
elif len(next_word) < max_long_line and (
ratio < hyphens or ratio > 1):
# The next word does not fit, try hyphenation
dictionary = PYPHEN_DICTIONARY_CACHE.get(lang)
if dictionary is None:
dictionary = pyphen.Pyphen(lang=lang)
PYPHEN_DICTIONARY_CACHE[lang] = dictionary
for first_word_part, _ in dictionary.iterate(next_word):
new_first_line = first_part + first_word_part + '-'
temp_layout = create_layout(
new_first_line, style, hinting, max_width)
temp_lines = temp_layout.iter_lines()
temp_first_line = next(temp_lines, None)
temp_second_line = next(temp_lines, None)
temp_first_line_width, _height = get_size(temp_first_line)
if (temp_second_line is None and ratio <= 1) or ratio > 1:
hyphenated = True
# TODO: find why there's no need to .encode
resume_at = len(new_first_line) - 1
layout = temp_layout
first_line = temp_first_line
second_line = temp_second_line
if temp_first_line_width <= max_width:
break
2012-11-22 23:55:09 +04:00
# Step #3: We have the right layout, find metrics
length = first_line.length
if not hyphenated:
first_line_text = text.encode('utf-8')[:length].decode('utf-8')
if first_line_text.endswith(' ') and resume_at:
# Remove trailing spaces
layout.set_text(first_line_text.rstrip(' '))
first_line = next(layout.iter_lines(), None)
length = first_line.length if first_line is not None else 0
width, height = get_size(first_line)
baseline = units_to_double(pango.pango_layout_iter_get_baseline(ffi.gc(
pango.pango_layout_get_iter(layout.layout),
pango.pango_layout_iter_free)))
2012-11-22 23:55:09 +04:00
# Step #4: Return the layout and the metrics
return layout, length, resume_at, width, height, baseline
2012-07-12 18:10:30 +04:00
def line_widths(box, enable_hinting, width, skip=None):
"""Return the width for each line."""
2012-06-22 13:40:15 +04:00
# TODO: without the lstrip, we get an extra empty line at the beginning. Is
# there a better solution to avoid that?
layout = create_layout(
box.text[(skip or 0):].lstrip(' '), box.style, enable_hinting, width)
for line in layout.iter_lines():
width, _height = get_size(line)
yield width
def show_first_line(cairo_context, pango_layout, hinting):
"""Draw the given ``line`` to the Cairo ``context``."""
2012-12-30 14:08:55 +04:00
cairo_context = ffi.cast('cairo_t *', cairo_context._pointer)
if hinting:
2012-12-29 04:00:30 +04:00
pangocairo.pango_cairo_update_layout(cairo_context, pango_layout.layout)
pangocairo.pango_cairo_show_layout_line(
cairo_context, next(pango_layout.iter_lines()))