1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 00:21:15 +03:00
WeasyPrint/weasyprint/text.py

443 lines
16 KiB
Python
Raw Normal View History

# coding: utf8
"""
weasyprint.text
---------------
2011-08-19 18:52:56 +04:00
Interface with Pango to decide where to do line breaks and to draw text.
2011-08-19 18:52:56 +04:00
:copyright: Copyright 2011-2012 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
2011-08-19 18:52:56 +04:00
"""
from __future__ import division
# XXX No unicode_literals, cffi likes native strings
2011-09-30 13:54:56 +04:00
import pyphen
import cffi
import cairocffi as cairo
2013-03-02 04:33:37 +04:00
from .compat import basestring
2012-12-26 16:25:18 +04:00
ffi = cffi.FFI()
ffi.cdef('''
typedef enum {
NORMAL,
OBLIQUE,
ITALIC
} PangoStyle;
typedef enum {
THIN = 100,
ULTRALIGHT = 200,
LIGHT = 300,
BOOK = 380,
NORMAL = 400,
MEDIUM = 500,
SEMIBOLD = 600,
BOLD = 700,
ULTRABOLD = 800,
HEAVY = 900,
ULTRAHEAVY = 1000
} PangoWeight;
typedef enum {
NORMAL,
SMALL_CAPS
} PangoVariant;
typedef enum {
ULTRA_CONDENSED,
EXTRA_CONDENSED,
CONDENSED,
SEMI_CONDENSED,
NORMAL,
SEMI_EXPANDED,
EXPANDED,
EXTRA_EXPANDED,
ULTRA_EXPANDED
} PangoStretch;
typedef enum {
WRAP_WORD,
WRAP_CHAR,
WRAP_WORD_CHAR
} PangoWrapMode;
2012-12-29 17:20:38 +04:00
typedef unsigned int guint;
typedef int gint;
typedef gint gboolean;
2012-12-29 14:50:11 +04:00
typedef void* gpointer;
typedef ... cairo_t;
typedef ... PangoLayout;
typedef ... PangoFontDescription;
typedef ... PangoLayoutIter;
2012-12-29 17:20:38 +04:00
typedef ... PangoAttrList;
typedef ... PangoAttrClass;
typedef struct {
const PangoAttrClass *klass;
guint start_index;
guint end_index;
} PangoAttribute;
typedef struct {
PangoLayout *layout;
gint start_index;
gint length;
/* ... */
} PangoLayoutLine;
double pango_units_to_double (int i);
int pango_units_from_double (double d);
2012-12-29 17:20:38 +04:00
void g_object_unref (gpointer object);
2012-12-29 17:20:38 +04:00
PangoLayout * pango_cairo_create_layout (cairo_t *cr);
void pango_layout_set_wrap (PangoLayout *layout, PangoWrapMode wrap);
2012-12-29 17:20:38 +04:00
void pango_layout_set_width (PangoLayout *layout, int width);
void pango_layout_set_attributes(PangoLayout *layout, PangoAttrList *attrs);
void pango_layout_set_text (
PangoLayout *layout, const char *text, int length);
2012-12-29 17:20:38 +04:00
void pango_layout_set_font_description (
PangoLayout *layout, const PangoFontDescription *desc);
PangoFontDescription * pango_font_description_new (void);
void pango_font_description_free (PangoFontDescription *desc);
void pango_font_description_set_family (
PangoFontDescription *desc, const char *family);
void pango_font_description_set_variant (
PangoFontDescription *desc, PangoVariant variant);
void pango_font_description_set_style (
PangoFontDescription *desc, PangoStyle style);
void pango_font_description_set_stretch (
PangoFontDescription *desc, PangoStretch stretch);
void pango_font_description_set_weight (
PangoFontDescription *desc, PangoWeight weight);
void pango_font_description_set_absolute_size (
PangoFontDescription *desc, double size);
2012-12-29 17:20:38 +04:00
PangoAttrList * pango_attr_list_new (void);
void pango_attr_list_unref (PangoAttrList *list);
void pango_attr_list_insert (
PangoAttrList *list, PangoAttribute *attr);
PangoAttribute * pango_attr_letter_spacing_new (int letter_spacing);
void pango_attribute_destroy (PangoAttribute *attr);
PangoLayoutIter * pango_layout_get_iter (PangoLayout *layout);
void pango_layout_iter_free (PangoLayoutIter *iter);
gboolean pango_layout_iter_next_line (PangoLayoutIter *iter);
PangoLayoutLine * pango_layout_iter_get_line_readonly (
PangoLayoutIter *iter);
int pango_layout_iter_get_baseline (PangoLayoutIter *iter);
typedef struct {
int x;
int y;
int width;
int height;
} PangoRectangle;
void pango_layout_line_get_extents (
PangoLayoutLine *line,
PangoRectangle *ink_rect, PangoRectangle *logical_rect);
2012-12-29 04:00:30 +04:00
void pango_cairo_update_layout (cairo_t *cr, PangoLayout *layout);
void pango_cairo_show_layout_line (cairo_t *cr, PangoLayoutLine *line);
''')
def dlopen(ffi, *names):
"""Try various names for the same libraries, for different platforms."""
for name in names:
try:
return ffi.dlopen(name)
except OSError:
pass
# Re-raise the exception.
return ffi.dlopen(names[0]) # pragma: no cover
2012-05-31 03:14:15 +04:00
gobject = dlopen(ffi, 'gobject-2.0', 'libgobject-2.0-0')
pango = dlopen(ffi, 'pango-1.0', 'libpango-1.0-0')
pangocairo = dlopen(ffi, 'pangocairo-1.0', 'libpangocairo-1.0-0')
units_to_double = pango.pango_units_to_double
units_from_double = pango.pango_units_from_double
PYPHEN_DICTIONARY_CACHE = {}
def to_enum(string):
return str(string.replace('-', '_').upper())
2013-03-02 04:33:37 +04:00
def utf8_slice(string, slice_):
return string.encode('utf-8')[slice_].decode('utf-8')
def unicode_to_char_p(string):
2012-12-29 17:20:38 +04:00
bytestring = string.encode('utf8').replace(b'\x00', b'')
return ffi.new('char[]', bytestring), bytestring
def get_size(line):
logical_extents = ffi.new('PangoRectangle *')
pango.pango_layout_line_get_extents(line, ffi.NULL, logical_extents)
return (units_to_double(logical_extents.width),
units_to_double(logical_extents.height))
2013-03-02 04:33:37 +04:00
def first_line_metrics(first_line, text, layout, resume_at, hyphenated=False):
length = first_line.length
if not hyphenated:
first_line_text = utf8_slice(text, slice(length))
if first_line_text.endswith(' ') and resume_at:
# Remove trailing spaces
layout.set_text(first_line_text.rstrip(' '))
first_line = next(layout.iter_lines(), None)
length = first_line.length if first_line is not None else 0
width, height = get_size(first_line)
baseline = units_to_double(pango.pango_layout_iter_get_baseline(ffi.gc(
pango.pango_layout_get_iter(layout.layout),
pango.pango_layout_iter_free)))
return layout, length, resume_at, width, height, baseline
class Layout(object):
"""Object holding PangoLayout-related cdata pointers."""
def iter_lines(self):
layout_iter = ffi.gc(
pango.pango_layout_get_iter(self.layout),
pango.pango_layout_iter_free)
while 1:
yield pango.pango_layout_iter_get_line_readonly(layout_iter)
if not pango.pango_layout_iter_next_line(layout_iter):
return
def set_text(self, text):
2012-12-29 17:20:38 +04:00
text, bytestring = unicode_to_char_p(text)
self.text = text
self.text_bytes = bytestring
pango.pango_layout_set_text(self.layout, text, -1)
def create_layout(text, style, hinting, max_width):
"""Return an opaque Pango layout with default Pango line-breaks.
2011-08-19 18:52:56 +04:00
:param text: Unicode
:param style: a :class:`StyleDict` of computed values
:param hinting: whether to enable text hinting or not
:param max_width:
The maximum available width in the same unit as ``style.font_size``,
or ``None`` for unlimited width.
"""
layout_obj = Layout()
dummy_context = layout_obj.dummy_context = (
cairo.Context(cairo.ImageSurface('ARGB32', 1, 1))
if hinting else
cairo.Context(cairo.PDFSurface(None, 1, 1)))
layout = layout_obj.layout = ffi.gc(
pangocairo.pango_cairo_create_layout(ffi.cast(
2012-12-30 14:08:55 +04:00
'cairo_t *', dummy_context._pointer)),
gobject.g_object_unref)
font = layout_obj.font = ffi.gc(
pango.pango_font_description_new(),
pango.pango_font_description_free)
assert not isinstance(style.font_family, basestring), (
'font_family should be a list')
font_family = layout_obj.font_family = unicode_to_char_p(
2012-12-29 17:20:38 +04:00
','.join(style.font_family))[0]
pango.pango_font_description_set_family(font, font_family)
pango.pango_font_description_set_variant(font, to_enum(style.font_variant))
pango.pango_font_description_set_style(font, to_enum(style.font_style))
pango.pango_font_description_set_stretch(font, to_enum(style.font_stretch))
pango.pango_font_description_set_weight(font, style.font_weight)
pango.pango_font_description_set_absolute_size(
font, units_from_double(style.font_size))
pango.pango_layout_set_font_description(layout, font)
pango.pango_layout_set_wrap(layout, 'WRAP_WORD')
layout_obj.set_text(text)
# Make sure that max_width * Pango.SCALE == max_width * 1024 fits in a
# signed integer. Treat bigger values same as None: unconstrained width.
if max_width is not None and max_width < 2 ** 21:
pango.pango_layout_set_width(layout, units_from_double(max_width))
word_spacing = style.word_spacing
letter_spacing = style.letter_spacing
if letter_spacing == 'normal':
letter_spacing = 0
if text and (word_spacing != 0 or letter_spacing != 0):
letter_spacing = units_from_double(letter_spacing)
2012-12-29 17:20:38 +04:00
space_spacing = units_from_double(word_spacing) + letter_spacing
attr_list = pango.pango_attr_list_new()
def add_attr(start, end, spacing):
attr = pango.pango_attr_letter_spacing_new(spacing)
attr.start_index = start
attr.end_index = end
pango.pango_attr_list_insert(attr_list, attr)
text_bytes = layout_obj.text_bytes
add_attr(0, len(text_bytes) + 1, letter_spacing)
position = text_bytes.find(b' ')
while position != -1:
add_attr(position, position + 1, space_spacing)
position = text_bytes.find(b' ', position + 1)
pango.pango_layout_set_attributes(layout, attr_list)
pango.pango_attr_list_unref(attr_list)
return layout_obj
def split_first_line(text, style, hinting, max_width, line_width):
"""Fit as much as possible in the available width for one line of text.
Return ``(layout, length, resume_at, width, height, baseline)``.
``layout``: a pango Layout with the first line
``length``: length in UTF-8 bytes of the first line
``resume_at``: The number of UTF-8 bytes to skip for the next line.
May be ``None`` if the whole text fits in one line.
This may be greater than ``length`` in case of preserved
newline characters.
``width``: width in pixels of the first line
``height``: height in pixels of the first line
``baseline``: baseline in pixels of the first line
2011-08-19 18:52:56 +04:00
2011-07-29 03:13:07 +04:00
"""
2012-11-22 23:55:09 +04:00
# Step #1: Get a draft layout with the first line
layout = None
if max_width:
expected_length = int(max_width / style.font_size * 2.5)
if expected_length < len(text):
# Try to use a small amount of text instead of the whole text
layout = create_layout(
text[:expected_length], style, hinting, max_width)
2012-12-29 04:00:30 +04:00
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
if second_line is None:
# The small amount of text fits in one line, give up and use
# the whole text
layout = None
2012-12-29 04:00:30 +04:00
if layout is None:
layout = create_layout(text, style, hinting, max_width)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
2013-03-02 04:33:37 +04:00
resume_at = None if second_line is None else second_line.start_index
2012-11-22 23:55:09 +04:00
2013-03-02 04:33:37 +04:00
# Step #2: Don't hyphenize when it's not needed
if max_width is None:
# The first line can take all the place needed
return first_line_metrics(first_line, text, layout, resume_at)
first_line_width, _height = get_size(first_line)
if second_line is None and first_line_width <= max_width:
# The first line fits in the available width
return first_line_metrics(first_line, text, layout, resume_at)
# Step #3: Try to put the first word of the second line on the first line
if first_line_width <= max_width:
# The first line may have been cut too early by Pango
second_line_index = second_line.start_index
first_part = utf8_slice(text, slice(second_line_index))
second_part = utf8_slice(text, slice(second_line_index, None))
2012-11-22 23:55:09 +04:00
else:
2013-03-02 04:33:37 +04:00
# The first word is longer than the line, try to hyphenize it
first_part = ''
second_part = text
next_word = second_part.split(' ', 1)[0]
if not next_word:
# We did not find a word on the next line
return first_line_metrics(first_line, text, layout, resume_at)
# next_word might fit without a space afterwards.
# Pango previously counted that spaces advance width.
new_first_line = first_part + next_word
layout.set_text(new_first_line)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
first_line_width, _height = get_size(first_line)
2013-03-02 04:33:37 +04:00
if second_line is None and first_line_width <= max_width:
# The next word fits in the first line, keep the layout
resume_at = len(new_first_line.encode('utf-8')) + 1
return first_line_metrics(first_line, text, layout, resume_at)
# Step #4: Try to hyphenize
hyphens = style.hyphens
lang = style.lang
if hyphens in ('none', 'manual') or lang not in pyphen.LANGUAGES:
2013-03-02 06:42:36 +04:00
# No automatic hyphenation
return first_line_metrics(first_line, text, layout, resume_at)
2013-03-02 04:33:37 +04:00
2013-03-02 06:42:36 +04:00
first_line_width, _height = get_size(first_line)
space = max_width - first_line_width
if style.hyphenate_limit_zone.unit == '%':
limit_zone = max_width * style.hyphenate_limit_zone.value / 100.
2013-03-02 04:33:37 +04:00
else:
2013-03-02 06:42:36 +04:00
limit_zone = style.hyphenate_limit_zone.value
2012-11-22 23:55:09 +04:00
2013-03-02 04:33:37 +04:00
hyphenated = False
2013-03-02 06:42:36 +04:00
if space > limit_zone or space < 0:
2013-03-02 04:33:37 +04:00
# The next word does not fit, try hyphenation
dictionary = PYPHEN_DICTIONARY_CACHE.get(lang)
if dictionary is None:
dictionary = pyphen.Pyphen(lang=lang)
PYPHEN_DICTIONARY_CACHE[lang] = dictionary
for first_word_part, _ in dictionary.iterate(next_word):
2013-03-02 05:45:48 +04:00
new_first_line = (
first_part + first_word_part + style.hyphenate_character)
2013-03-02 04:33:37 +04:00
temp_layout = create_layout(
new_first_line, style, hinting, max_width)
temp_lines = temp_layout.iter_lines()
temp_first_line = next(temp_lines, None)
temp_second_line = next(temp_lines, None)
2013-03-02 06:42:36 +04:00
if (temp_second_line is None and space >= 0) or space < 0:
2013-03-02 04:33:37 +04:00
hyphenated = True
# TODO: find why there's no need to .encode
2013-03-02 05:45:48 +04:00
resume_at = len(first_part + first_word_part)
2013-03-02 04:33:37 +04:00
layout = temp_layout
first_line = temp_first_line
second_line = temp_second_line
2013-03-02 05:45:48 +04:00
temp_first_line_width, _height = get_size(temp_first_line)
2013-03-02 04:33:37 +04:00
if temp_first_line_width <= max_width:
break
return first_line_metrics(first_line, text, layout, resume_at, hyphenated)
2012-07-12 18:10:30 +04:00
def line_widths(box, enable_hinting, width, skip=None):
"""Return the width for each line."""
2012-06-22 13:40:15 +04:00
# TODO: without the lstrip, we get an extra empty line at the beginning. Is
# there a better solution to avoid that?
layout = create_layout(
box.text[(skip or 0):].lstrip(' '), box.style, enable_hinting, width)
for line in layout.iter_lines():
width, _height = get_size(line)
yield width
def show_first_line(cairo_context, pango_layout, hinting):
"""Draw the given ``line`` to the Cairo ``context``."""
2012-12-30 14:08:55 +04:00
cairo_context = ffi.cast('cairo_t *', cairo_context._pointer)
if hinting:
2012-12-29 04:00:30 +04:00
pangocairo.pango_cairo_update_layout(cairo_context, pango_layout.layout)
pangocairo.pango_cairo_show_layout_line(
cairo_context, next(pango_layout.iter_lines()))