Port code to truncate strings to visual width to Go

This commit is contained in:
Kovid Goyal 2022-09-18 13:17:39 +05:30
parent d16ad40bbf
commit 8796168469
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 116 additions and 1 deletions

View File

@ -398,9 +398,10 @@ class TestDataTypes(BaseTest):
self.ae(tpl('a\U0001f337', 2), 1)
self.ae(tpl('a\U0001f337', 3), 2)
self.ae(tpl('a\U0001f337b', 4), 3)
self.ae(sanitize_title('a\0\01 \t\n\f\rb'), 'a b')
self.ae(tpl('a\x1b[31mbc', 2), 7)
self.ae(sanitize_title('a\0\01 \t\n\f\rb'), 'a b')
def tp(*data, leftover='', text='', csi='', apc='', ibp=False):
text_r, csi_r, apc_r, rest = [], [], [], []
left = ''

View File

@ -0,0 +1,93 @@
// License: GPLv3 Copyright: 2022, Kovid Goyal, <kovid at kovidgoyal.net>
package wcswidth
import (
"errors"
"fmt"
"kitty/tools/utils"
)
var _ = fmt.Print
type truncate_error struct {
pos, width int
}
func (self *truncate_error) Error() string {
return fmt.Sprint("Truncation at:", self.pos, " with width:", self.width)
}
type truncate_iterator struct {
w WCWidthIterator
pos, limit int
limit_exceeded_at *truncate_error
}
func (self *truncate_iterator) handle_csi(body []byte) error {
self.pos += len(body) + 2
return nil
}
func (self *truncate_iterator) handle_st_terminated_escape_code(body []byte) error {
self.pos += len(body) + 4
return nil
}
func create_truncate_iterator() *truncate_iterator {
var ans truncate_iterator
ans.w.parser.HandleRune = ans.handle_rune
ans.w.parser.HandleCSI = ans.handle_csi
ans.w.parser.HandleOSC = ans.handle_st_terminated_escape_code
ans.w.parser.HandleAPC = ans.handle_st_terminated_escape_code
ans.w.parser.HandleDCS = ans.handle_st_terminated_escape_code
ans.w.parser.HandlePM = ans.handle_st_terminated_escape_code
ans.w.parser.HandleSOS = ans.handle_st_terminated_escape_code
return &ans
}
func (self *truncate_iterator) handle_rune(ch rune) error {
width := self.w.current_width
self.w.handle_rune(ch)
if self.limit_exceeded_at != nil {
if self.w.current_width <= self.limit { // emoji variation selectors can cause width to decrease
return &truncate_error{pos: self.pos + len(string(ch)), width: self.w.current_width}
}
return self.limit_exceeded_at
}
if self.w.current_width > self.limit {
self.limit_exceeded_at = &truncate_error{pos: self.pos, width: width}
}
self.pos += len(string(ch))
return nil
}
func (self *truncate_iterator) parse(b []byte) (ans int, width int) {
err := self.w.parser.Parse(b)
var te *truncate_error
if err != nil && errors.As(err, &te) {
return te.pos, te.width
}
if self.limit_exceeded_at != nil {
return self.limit_exceeded_at.pos, self.limit_exceeded_at.width
}
return len(b), self.w.current_width
}
func TruncateToVisualLengthWithWidth(text string, length int) (truncated string, width_of_truncated int) {
if length < 1 {
return text[:0], 0
}
t := create_truncate_iterator()
t.limit = length
t.limit_exceeded_at = nil
t.w.current_width = 0
truncate_point, width := t.parse(utils.UnsafeStringToBytes(text))
return text[:truncate_point], width
}
func TruncateToVisualLength(text string, length int) string {
ans, _ := TruncateToVisualLengthWithWidth(text, length)
return ans
}

View File

@ -37,4 +37,25 @@ func TestWCSWidth(t *testing.T) {
// Flags individually and together
wcwidth("\U0001f1ee\U0001f1f3", 2, 2)
wcswidth("\U0001f1ee\U0001f1f3", 2)
truncate := func(text string, length int, expected string, expected_width int) {
actual, actual_width := TruncateToVisualLengthWithWidth(text, length)
if actual != expected {
t.Fatalf("Failed to truncate \"%s\" to %d\nExpected: %#v\nActual: %#v", text, length, expected, actual)
}
if actual_width != expected_width {
t.Fatalf("Failed to truncate with width \"%s\" to %d\nExpected: %d\nActual: %d", text, length, expected_width, actual_width)
}
}
truncate("abc", 4, "abc", 3)
truncate("abc", 3, "abc", 3)
truncate("abc", 2, "ab", 2)
truncate("abc", 0, "", 0)
truncate("a🌷", 2, "a", 1)
truncate("a🌷", 3, "a🌷", 3)
truncate("a🌷b", 3, "a🌷", 3)
truncate("a🌷b", 4, "a🌷b", 4)
truncate("a🌷\ufe0e", 2, "a🌷\ufe0e", 2)
truncate("a🌷\ufe0eb", 3, "a🌷\ufe0eb", 3)
truncate("a\x1b[31mb", 2, "a\x1b[31mb", 2)
}