1
1
mirror of https://github.com/wader/fq.git synced 2024-12-22 12:51:38 +03:00

decode: Use golang.org/x/text for text decoding

This commit is contained in:
Mattias Wadman 2021-09-06 01:08:03 +02:00
parent 85d17199de
commit a49e924d5f
7 changed files with 84 additions and 100 deletions

61
.vscode/settings.json vendored
View File

@ -1,45 +1,19 @@
{
"cSpell.words": [
"AACSSR",
"acsp",
"addrbase",
"APETAGEX",
"APEV",
"APIC",
"Arity",
"BCDU",
"bzip",
"CCIT",
"chzyer",
"CLICOLOR",
"CLIUNICODE",
"CLLID",
"Equalisation",
"Errorer",
"Exif",
"Exiter",
"FALLID",
"GOPATH",
"HEIC",
"HEVC",
"Horz",
"IDID",
"IEND",
"IHDR",
"Linearization",
"MJPEG",
"OSFS",
"Packetized",
"RDBs",
"RELA",
"SMPTE",
"TSSE",
"UIDID",
"UTCID",
"Uinteger",
"Unsychronized",
"WEBP",
"Xiph",
"acsp",
"addrbase",
"bzip",
"chzyer",
"coef",
"colorjson",
"cpus",
@ -50,8 +24,13 @@
"dumpaddr",
"elif",
"endians",
"Equalisation",
"Errorer",
"errorln",
"esds",
"Exif",
"Exiter",
"FALLID",
"fpbits",
"fqtest",
"ftyp",
@ -62,9 +41,16 @@
"gojq",
"gojqextra",
"golangci",
"GOPATH",
"gosec",
"gosimple",
"gsub",
"HEIC",
"HEVC",
"Horz",
"IDID",
"IEND",
"IHDR",
"iinf",
"iloc",
"ilst",
@ -78,12 +64,14 @@
"itchyny",
"ldflags",
"libavformat",
"Linearization",
"localizable",
"luma",
"matroska",
"mfhd",
"mfra",
"mfro",
"MJPEG",
"moof",
"moov",
"mvex",
@ -94,17 +82,23 @@
"nolint",
"obus",
"origptr",
"OSFS",
"Packetized",
"pcap",
"pmezard",
"println",
"protobuf",
"PSSH",
"randomised",
"RDBs",
"recoverfn",
"RELA",
"rpad",
"rtrimstr",
"sels",
"shquote",
"sidx",
"SMPTE",
"stco",
"stedolan",
"struct",
@ -129,10 +123,17 @@
"traf",
"trex",
"trun",
"TSSE",
"udta",
"ufffd",
"UIDID",
"Uinteger",
"unconvert",
"unparam",
"Unsychronized",
"UTCID",
"WEBP",
"Xiph",
"xrange"
],
"editor.formatOnSave": true,

View File

@ -6,7 +6,6 @@ package id3
// https://id3.org/id3v2-chapters-1.0
import (
"bytes"
"fmt"
"io"
"strings"
@ -14,6 +13,9 @@ import (
"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/decode"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
)
var imageFormat []*decode.Format
@ -237,54 +239,30 @@ var encodingNames = map[uint64]string{
encodingUTF8: "UTF-8",
}
var encodingLen = map[uint64]int{
var encodingLen = map[uint64]int64{
encodingISO8859_1: 1,
encodingUTF16: 2,
encodingUTF16BE: 2,
encodingUTF8: 1,
}
var encodingToUTF8 = map[int]func(b []byte) string{
encodingISO8859_1: func(b []byte) string {
rs := make([]rune, len(b))
for i, r := range b {
rs[i] = rune(r)
}
return string(rs)
},
encodingUTF16: func(b []byte) string {
beBOM := []byte("\xfe\xff")
leBOM := []byte("\xff\xfe")
var rs []rune
switch {
case bytes.HasPrefix(b, leBOM):
// strip BOM
b = b[2:]
rs = make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
}
case bytes.HasPrefix(b, beBOM):
b = b[2:]
fallthrough
default:
rs = make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2])<<8 | uint(b[i*2+1]))
}
}
return string(rs)
},
encodingUTF16BE: func(b []byte) string {
rs := make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
}
return string(rs)
},
encodingUTF8: func(b []byte) string {
return string(b)
},
func decodeToString(e int, b []byte) string {
var enc encoding.Encoding
switch e {
case encodingISO8859_1:
enc = charmap.ISO8859_1
case encodingUTF16:
enc = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
case encodingUTF16BE:
enc = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
default:
enc = unicode.UTF8
}
// TODO: try decode?
s, _ := enc.NewDecoder().String(string(b))
return s
}
func syncSafeU32(d *decode.D) uint64 {
@ -300,11 +278,7 @@ func syncSafeU32(d *decode.D) uint64 {
}
func text(d *decode.D, encoding int, nBytes int) string {
encodingFn := encodingToUTF8[encodingUTF8]
if fn, ok := encodingToUTF8[encoding]; ok {
encodingFn = fn
}
return strings.TrimRight(encodingFn(d.BytesLen(nBytes)), "\x00")
return strings.TrimRight(decodeToString(encoding, d.BytesLen(nBytes)), "\x00")
}
func textNull(d *decode.D, encoding int) string {
@ -313,11 +287,17 @@ func textNull(d *decode.D, encoding int) string {
nullLen = n
}
offset, _ := d.PeekFind(nullLen*8, 8, func(v uint64) bool { return v == 0 }, -1)
pos := offset / 8
textLen := int(pos + int64(nullLen))
text := text(d, encoding, textLen)
offset, _ := d.PeekFind(
int(nullLen)*8,
nullLen*8,
func(v uint64) bool { return v == 0 },
-1,
)
offsetBytes := offset / 8
text := text(d, encoding, int(offsetBytes))
d.SeekRel(nullLen * 8)
// seems sometimes utf16 etc has en exta null byte
if nullLen > 1 && d.PeekBits(8) == 0 {
d.SeekRel(8)
}

4
go.mod
View File

@ -11,6 +11,10 @@ require (
github.com/wader/gojq v0.12.1-0.20210903162226-412e6dd62f26
// fork of github.com/chzyer/readline
github.com/wader/readline v0.0.0-20210817095433-c868eb04b8b2
// bump: golang/text /golang\.org\/x\/text v(.*)/ git://github.com/golang/text|^0
// bump: golang/text command go get -d golang.org/x/text@v$LATEST && go mod tidy
golang.org/x/text v0.3.7
)
require (

3
go.sum
View File

@ -22,6 +22,9 @@ golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e h1:XMgFehsDnnLGtjvjOfqWSUzt0alpTR1RSEuznObga2c=
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -4,6 +4,7 @@ import (
"io"
"github.com/wader/fq/pkg/bitio"
"golang.org/x/text/encoding/unicode"
)
// TODO: FP64,unsigned/BE/LE? rename SFP32?
@ -18,28 +19,19 @@ func (d *D) TryUTF8(nBytes int) (string, error) {
func (d *D) TryUTF16BE(nBytes int) (string, error) {
b, err := d.bitBuf.BytesLen(nBytes)
// TODO: len check
rs := make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
}
if err != nil {
return "", err
}
return string(rs), nil
return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
}
func (d *D) TryUTF16LE(nBytes int) (string, error) {
b, err := d.bitBuf.BytesLen(nBytes)
// TODO: len check
rs := make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
}
if err != nil {
return "", err
}
return string(rs), nil
return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
}
// TryUTF8ShortString read pascal short string, max nBytes

View File

@ -17,6 +17,7 @@ def args_parse($args; $opts):
end;
def _parse_without_arg($new_args; $optname):
_parse($new_args; $flagmap; ($r | .parsed[$optname] = true));
# this is to support --arg=VALUE
( ($args[0] | index("=")) as $assign_i
| ( if $assign_i then $args[0][0:$assign_i]
else $args[0]

View File

@ -214,13 +214,16 @@ func (i *Interp) readline(c interface{}, a []interface{}) interface{} {
}
}
src, err := i.os.Readline(prompt, func(line string, pos int) (newLine []string, shared int) {
completeCtx, completeCtxCancelFn := context.WithTimeout(i.evalContext.ctx, 1*time.Second)
defer completeCtxCancelFn()
// TODO: err
names, shared, _ := completeTrampoline(completeCtx, completeFn, c, i, line, pos)
return names, shared
})
src, err := i.os.Readline(
prompt,
func(line string, pos int) (newLine []string, shared int) {
completeCtx, completeCtxCancelFn := context.WithTimeout(i.evalContext.ctx, 1*time.Second)
defer completeCtxCancelFn()
// TODO: err
names, shared, _ := completeTrampoline(completeCtx, completeFn, c, i, line, pos)
return names, shared
},
)
if errors.Is(err, ErrInterrupt) {
return valueError{"interrupt"}