Implemented CSS3 overflow-wrap property

Implemented the property as described by the W3C draft. There is still a bug with shaping characters which does not keep their shapes when wrapped on new lines. Implemented as overflow-wrap CSS property, but maybe it should be refactored to -weasy-overflow-wrap. Browser implementation is inconsistent, so there was no clear answer.
2024-10-05 00:21:15 +03:00 · 2013-09-30 14:16:03 -04:00 · 2013-09-30 14:16:03 -04:00 · f409ceeb25
commit f409ceeb25
parent 57b3dd5d07
5 changed files with 150 additions and 37 deletions
--- a/weasyprint/css/properties.py
+++ b/weasyprint/css/properties.py
@ -119,6 +119,7 @@ INITIAL_VALUES = {

    # CSS3 User Interface: http://www.w3.org/TR/css3-ui/#box-sizing
    'box_sizing': 'content-box',
+    'overflow_wrap': 'normal',

    # CSS3 Color: http://www.w3.org/TR/css3-color/#transparency
    'opacity': 1,
@ -210,6 +211,7 @@ INHERITED = set("""
    list_style_position
    list_style_type
    orphans
+    overflow_wrap
    quotes
    text_align
    text_decoration
--- a/weasyprint/css/validation.py
+++ b/weasyprint/css/validation.py
@ -1035,6 +1035,12 @@ def white_space(keyword):
    """``white-space`` property validation."""
    return keyword in ('normal', 'pre', 'nowrap', 'pre-wrap', 'pre-line')

+@validator()
+@single_keyword
+def overflow_wrap(keyword):
+    """``overflow-wrap`` property validation."""
+    return keyword in ('normal', 'break-word')
+

@validator(unprefixed=True)
@single_keyword
--- a/weasyprint/tests/test_css_validation.py
+++ b/weasyprint/tests/test_css_validation.py
@ -559,6 +559,16 @@ def test_linear_gradient():
    invalid('bottom left, blue')


+@assert_no_logs
+def test_overflow_wrap():
+    assert expand_to_dict('overflow-wrap: normal') == {
+        'overflow_wrap': 'normal'}
+    assert expand_to_dict('overflow-wrap: break-word') == {
+        'overflow_wrap': 'break-word'}
+    assert_invalid('overflow-wrap: none')
+    assert_invalid('overflow-wrap: normal, break-word')
+
+
@assert_no_logs
 def test_radial_gradient():
    red = (1, 0, 0, 1)
--- a/weasyprint/tests/test_layout.py
+++ b/weasyprint/tests/test_layout.py
@ -4705,6 +4705,52 @@ def test_hyphenate_limit_chars():
    assert line_count('auto 2') == 2


+@assert_no_logs
+def test_overflow_wrap():
+    def get_lines(wrap, text):
+        page, = parse('''
+            <style>
+                body {width: 30px; overflow: hidden; }
+                span {overflow-wrap: %s; white-space: normal; }
+            </style>
+            <body style="-weasy-hyphens: auto;" lang="en">
+                <span>%s
+        ''' % (wrap, text))
+        html, = page.children
+        body, = html.children
+        body_lines = [];
+        for line in body.children:
+            box, = line.children
+            textBox, = box.children
+            body_lines.append(textBox.text)
+            
+        return body_lines
+        
+    # break-word
+    lines = get_lines('break-word', 'aaaaaaaa')
+    assert len(lines) == 3
+    full_text = ''.join(line for line in lines)
+    assert full_text == 'aaaaaaaa'
+    
+    # normal
+    lines = get_lines('normal', 'aaaaaaaa')
+    assert len(lines) == 1
+    full_text = ''.join(line for line in lines)
+    assert full_text == 'aaaaaaaa'
+    
+    # break-word after hyphenation
+    lines = get_lines('break-word', 'hyphenation')
+    assert len(lines) == 5
+    full_text = ''.join(line for line in lines)
+    assert full_text == "hy\u2010phenation"
+    
+    # break word after normal white-space wrap and hyphenation
+    lines = get_lines('break-word', 'I am a splitted word.  I am an hyphenated word.')
+    assert len(lines) == 18
+    full_text = ''.join(line for line in lines)
+    assert full_text == "Iamasplittedword.Iamanhy\u2010phenatedword."
+
+
@assert_no_logs
 def test_white_space():
    """Test the white-space property."""
--- a/weasyprint/text.py
+++ b/weasyprint/text.py
@ -58,6 +58,12 @@ ffi.cdef('''
        PANGO_STRETCH_EXTRA_EXPANDED,
        PANGO_STRETCH_ULTRA_EXPANDED
    } PangoStretch;
+    
+    typedef enum {
+        PANGO_WRAP_WORD,
+        PANGO_WRAP_CHAR,
+        PANGO_WRAP_WORD_CHAR
+    } PangoWrapMode;

    typedef unsigned int guint;
    typedef int gint;
@ -98,6 +104,8 @@ ffi.cdef('''
        PangoLayout *layout, const char *text, int length);
    void pango_layout_set_font_description (
        PangoLayout *layout, const PangoFontDescription *desc);
+    void pango_layout_set_wrap (
+        PangoLayout *layout, PangoWrapMode wrap);


    PangoFontDescription * pango_font_description_new (void);
@ -227,6 +235,12 @@ PANGO_STRETCH = {
    'ultra-expanded': pango.PANGO_STRETCH_ULTRA_EXPANDED,
 }

+PANGO_WRAP_MODE = {
+    'WRAP_WORD' : pango.PANGO_WRAP_WORD,
+    'WRAP_CHAR' : pango.PANGO_WRAP_CHAR,
+    'WRAP_WORD_CHAR' : pango.PANGO_WRAP_WORD_CHAR
+}
+

 def utf8_slice(string, slice_):
    return string.encode('utf-8')[slice_].decode('utf-8')
@ -318,6 +332,9 @@ class Layout(object):
    def get_font_metrics(self):
        context = pango.pango_layout_get_context(self.layout)
        return FontMetrics(context, self.font)
+    
+    def set_wrap(self, wrap_mode):
+        pango.pango_layout_set_wrap(self.layout, wrap_mode)


 class FontMetrics(object):
@ -470,46 +487,78 @@ def split_first_line(text, style, hinting, max_width, line_width):
    hyphens = style.hyphens
    lang = style.lang
    total, left, right = style.hyphenate_limit_chars
-    if hyphens in ('none', 'manual') or lang not in pyphen.LANGUAGES:
-        # No automatic hyphenation
-        return first_line_metrics(first_line, text, layout, resume_at)
-    elif len(next_word) < total:
-        # Next word is too small
-        return first_line_metrics(first_line, text, layout, resume_at)
-
+    hyphenated = False
+    
+    # Automatic hyphenation possible and next word is long enough  
+    if hyphens not in ('none', 'manual') and lang in pyphen.LANGUAGES and len(next_word) >= total:
+        first_line_width, _height = get_size(first_line)
+        space = max_width - first_line_width
+        if style.hyphenate_limit_zone.unit == '%':
+            limit_zone = max_width * style.hyphenate_limit_zone.value / 100.
+        else:
+            limit_zone = style.hyphenate_limit_zone.value
+    
+        if space > limit_zone or space < 0:
+            # The next word does not fit, try hyphenation
+            dictionary_key = (lang, left, right, total)
+            dictionary = PYPHEN_DICTIONARY_CACHE.get(dictionary_key)
+            if dictionary is None:
+                dictionary = pyphen.Pyphen(lang=lang, left=left, right=right)
+                PYPHEN_DICTIONARY_CACHE[dictionary_key] = dictionary
+            for first_word_part, _ in dictionary.iterate(next_word):
+                new_first_line = (
+                    first_part + first_word_part + style.hyphenate_character)
+                temp_layout = create_layout(
+                    new_first_line, style, hinting, max_width)
+                temp_lines = temp_layout.iter_lines()
+                temp_first_line = next(temp_lines, None)
+                temp_second_line = next(temp_lines, None)
+                
+                if (temp_second_line is None and space >= 0) or space < 0:
+                    hyphenated = True
+                    # TODO: find why there's no need to .encode
+                    resume_at = len(first_part + first_word_part)
+                    layout = temp_layout
+                    first_line = temp_first_line
+                    second_line = temp_second_line
+                    temp_first_line_width, _height = get_size(temp_first_line)
+                    if temp_first_line_width <= max_width:
+                        break
+    
+    # Step 5: Try to break word if it's too long for the line
+    overflow_wrap = style.overflow_wrap
    first_line_width, _height = get_size(first_line)
    space = max_width - first_line_width
-    if style.hyphenate_limit_zone.unit == '%':
-        limit_zone = max_width * style.hyphenate_limit_zone.value / 100.
-    else:
-        limit_zone = style.hyphenate_limit_zone.value
+    # If we can break words and the first line is too long
+    if overflow_wrap == 'break-word' and space < 0:
+        if hyphenated:
+            # Is it really OK to remove hyphenation for word-break ?
+            new_first_line = (new_first_line.rstrip(new_first_line[-(len(style.hyphenate_character)):]))
+            if second_line is not None:
+                second_line_index = second_line.start_index
+                second_part = utf8_slice(text, slice(second_line_index, None))
+                new_first_line += second_part
+                
+            hyphenated = False
+        
+        # TODO: Modify code to preserve W3C condition:
+        # "Shaping characters are still shaped as if the word were not broken"
+        # The way new lines are processed in this function (one by one with no memory of the last)
+        # prevents shaping characters (arabic, for instance) from keeping their shape when
+        # wrapped on the next line with pango layout.  Maybe insert Unicode shaping characters
+        # in text ?  
+        temp_layout = create_layout(new_first_line, style, hinting, max_width)
+        temp_layout.set_wrap(PANGO_WRAP_MODE['WRAP_WORD_CHAR'])
+        temp_lines = temp_layout.iter_lines()
+        temp_first_line = next(temp_lines, None)
+        temp_second_line = next(temp_lines, None)
+        temp_second_line_index = len(new_first_line) if temp_second_line is None else temp_second_line.start_index
+        resume_at = temp_second_line_index
+        first_part = utf8_slice(text, slice(temp_second_line_index))
+        layout = create_layout(first_part, style, hinting, max_width)
+        lines = layout.iter_lines()
+        first_line = next(lines, None)

-    hyphenated = False
-    if space > limit_zone or space < 0:
-        # The next word does not fit, try hyphenation
-        dictionary_key = (lang, left, right, total)
-        dictionary = PYPHEN_DICTIONARY_CACHE.get(dictionary_key)
-        if dictionary is None:
-            dictionary = pyphen.Pyphen(lang=lang, left=left, right=right)
-            PYPHEN_DICTIONARY_CACHE[dictionary_key] = dictionary
-        for first_word_part, _ in dictionary.iterate(next_word):
-            new_first_line = (
-                first_part + first_word_part + style.hyphenate_character)
-            temp_layout = create_layout(
-                new_first_line, style, hinting, max_width)
-            temp_lines = temp_layout.iter_lines()
-            temp_first_line = next(temp_lines, None)
-            temp_second_line = next(temp_lines, None)
-            if (temp_second_line is None and space >= 0) or space < 0:
-                hyphenated = True
-                # TODO: find why there's no need to .encode
-                resume_at = len(first_part + first_word_part)
-                layout = temp_layout
-                first_line = temp_first_line
-                second_line = temp_second_line
-                temp_first_line_width, _height = get_size(temp_first_line)
-                if temp_first_line_width <= max_width:
-                    break
    return first_line_metrics(first_line, text, layout, resume_at, hyphenated)