1
1
mirror of https://github.com/wader/fq.git synced 2024-11-22 07:16:49 +03:00

decode: Use golang.org/x/text for text decoding

This commit is contained in:
Mattias Wadman 2021-09-06 01:08:03 +02:00
parent 85d17199de
commit a49e924d5f
7 changed files with 84 additions and 100 deletions

61
.vscode/settings.json vendored
View File

@ -1,45 +1,19 @@
{ {
"cSpell.words": [ "cSpell.words": [
"AACSSR", "AACSSR",
"acsp",
"addrbase",
"APETAGEX", "APETAGEX",
"APEV", "APEV",
"APIC", "APIC",
"Arity", "Arity",
"BCDU", "BCDU",
"bzip",
"CCIT", "CCIT",
"chzyer",
"CLICOLOR", "CLICOLOR",
"CLIUNICODE", "CLIUNICODE",
"CLLID", "CLLID",
"Equalisation",
"Errorer",
"Exif",
"Exiter",
"FALLID",
"GOPATH",
"HEIC",
"HEVC",
"Horz",
"IDID",
"IEND",
"IHDR",
"Linearization",
"MJPEG",
"OSFS",
"Packetized",
"RDBs",
"RELA",
"SMPTE",
"TSSE",
"UIDID",
"UTCID",
"Uinteger",
"Unsychronized",
"WEBP",
"Xiph",
"acsp",
"addrbase",
"bzip",
"chzyer",
"coef", "coef",
"colorjson", "colorjson",
"cpus", "cpus",
@ -50,8 +24,13 @@
"dumpaddr", "dumpaddr",
"elif", "elif",
"endians", "endians",
"Equalisation",
"Errorer",
"errorln", "errorln",
"esds", "esds",
"Exif",
"Exiter",
"FALLID",
"fpbits", "fpbits",
"fqtest", "fqtest",
"ftyp", "ftyp",
@ -62,9 +41,16 @@
"gojq", "gojq",
"gojqextra", "gojqextra",
"golangci", "golangci",
"GOPATH",
"gosec", "gosec",
"gosimple", "gosimple",
"gsub", "gsub",
"HEIC",
"HEVC",
"Horz",
"IDID",
"IEND",
"IHDR",
"iinf", "iinf",
"iloc", "iloc",
"ilst", "ilst",
@ -78,12 +64,14 @@
"itchyny", "itchyny",
"ldflags", "ldflags",
"libavformat", "libavformat",
"Linearization",
"localizable", "localizable",
"luma", "luma",
"matroska", "matroska",
"mfhd", "mfhd",
"mfra", "mfra",
"mfro", "mfro",
"MJPEG",
"moof", "moof",
"moov", "moov",
"mvex", "mvex",
@ -94,17 +82,23 @@
"nolint", "nolint",
"obus", "obus",
"origptr", "origptr",
"OSFS",
"Packetized",
"pcap", "pcap",
"pmezard", "pmezard",
"println", "println",
"protobuf", "protobuf",
"PSSH",
"randomised", "randomised",
"RDBs",
"recoverfn", "recoverfn",
"RELA",
"rpad", "rpad",
"rtrimstr", "rtrimstr",
"sels", "sels",
"shquote", "shquote",
"sidx", "sidx",
"SMPTE",
"stco", "stco",
"stedolan", "stedolan",
"struct", "struct",
@ -129,10 +123,17 @@
"traf", "traf",
"trex", "trex",
"trun", "trun",
"TSSE",
"udta", "udta",
"ufffd", "ufffd",
"UIDID",
"Uinteger",
"unconvert", "unconvert",
"unparam", "unparam",
"Unsychronized",
"UTCID",
"WEBP",
"Xiph",
"xrange" "xrange"
], ],
"editor.formatOnSave": true, "editor.formatOnSave": true,

View File

@ -6,7 +6,6 @@ package id3
// https://id3.org/id3v2-chapters-1.0 // https://id3.org/id3v2-chapters-1.0
import ( import (
"bytes"
"fmt" "fmt"
"io" "io"
"strings" "strings"
@ -14,6 +13,9 @@ import (
"github.com/wader/fq/format" "github.com/wader/fq/format"
"github.com/wader/fq/format/registry" "github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/decode" "github.com/wader/fq/pkg/decode"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
) )
var imageFormat []*decode.Format var imageFormat []*decode.Format
@ -237,54 +239,30 @@ var encodingNames = map[uint64]string{
encodingUTF8: "UTF-8", encodingUTF8: "UTF-8",
} }
var encodingLen = map[uint64]int{ var encodingLen = map[uint64]int64{
encodingISO8859_1: 1, encodingISO8859_1: 1,
encodingUTF16: 2, encodingUTF16: 2,
encodingUTF16BE: 2, encodingUTF16BE: 2,
encodingUTF8: 1, encodingUTF8: 1,
} }
var encodingToUTF8 = map[int]func(b []byte) string{ func decodeToString(e int, b []byte) string {
encodingISO8859_1: func(b []byte) string { var enc encoding.Encoding
rs := make([]rune, len(b))
for i, r := range b { switch e {
rs[i] = rune(r) case encodingISO8859_1:
} enc = charmap.ISO8859_1
return string(rs) case encodingUTF16:
}, enc = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
encodingUTF16: func(b []byte) string { case encodingUTF16BE:
beBOM := []byte("\xfe\xff") enc = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
leBOM := []byte("\xff\xfe") default:
var rs []rune enc = unicode.UTF8
switch { }
case bytes.HasPrefix(b, leBOM):
// strip BOM // TODO: try decode?
b = b[2:] s, _ := enc.NewDecoder().String(string(b))
rs = make([]rune, len(b)/2) return s
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
}
case bytes.HasPrefix(b, beBOM):
b = b[2:]
fallthrough
default:
rs = make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2])<<8 | uint(b[i*2+1]))
}
}
return string(rs)
},
encodingUTF16BE: func(b []byte) string {
rs := make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
}
return string(rs)
},
encodingUTF8: func(b []byte) string {
return string(b)
},
} }
func syncSafeU32(d *decode.D) uint64 { func syncSafeU32(d *decode.D) uint64 {
@ -300,11 +278,7 @@ func syncSafeU32(d *decode.D) uint64 {
} }
func text(d *decode.D, encoding int, nBytes int) string { func text(d *decode.D, encoding int, nBytes int) string {
encodingFn := encodingToUTF8[encodingUTF8] return strings.TrimRight(decodeToString(encoding, d.BytesLen(nBytes)), "\x00")
if fn, ok := encodingToUTF8[encoding]; ok {
encodingFn = fn
}
return strings.TrimRight(encodingFn(d.BytesLen(nBytes)), "\x00")
} }
func textNull(d *decode.D, encoding int) string { func textNull(d *decode.D, encoding int) string {
@ -313,11 +287,17 @@ func textNull(d *decode.D, encoding int) string {
nullLen = n nullLen = n
} }
offset, _ := d.PeekFind(nullLen*8, 8, func(v uint64) bool { return v == 0 }, -1) offset, _ := d.PeekFind(
pos := offset / 8 int(nullLen)*8,
textLen := int(pos + int64(nullLen)) nullLen*8,
text := text(d, encoding, textLen) func(v uint64) bool { return v == 0 },
-1,
)
offsetBytes := offset / 8
text := text(d, encoding, int(offsetBytes))
d.SeekRel(nullLen * 8)
// seems sometimes utf16 etc has en exta null byte
if nullLen > 1 && d.PeekBits(8) == 0 { if nullLen > 1 && d.PeekBits(8) == 0 {
d.SeekRel(8) d.SeekRel(8)
} }

4
go.mod
View File

@ -11,6 +11,10 @@ require (
github.com/wader/gojq v0.12.1-0.20210903162226-412e6dd62f26 github.com/wader/gojq v0.12.1-0.20210903162226-412e6dd62f26
// fork of github.com/chzyer/readline // fork of github.com/chzyer/readline
github.com/wader/readline v0.0.0-20210817095433-c868eb04b8b2 github.com/wader/readline v0.0.0-20210817095433-c868eb04b8b2
// bump: golang/text /golang\.org\/x\/text v(.*)/ git://github.com/golang/text|^0
// bump: golang/text command go get -d golang.org/x/text@v$LATEST && go mod tidy
golang.org/x/text v0.3.7
) )
require ( require (

3
go.sum
View File

@ -22,6 +22,9 @@ golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e h1:XMgFehsDnnLGtjvjOfqWSUzt0alpTR1RSEuznObga2c= golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e h1:XMgFehsDnnLGtjvjOfqWSUzt0alpTR1RSEuznObga2c=
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -4,6 +4,7 @@ import (
"io" "io"
"github.com/wader/fq/pkg/bitio" "github.com/wader/fq/pkg/bitio"
"golang.org/x/text/encoding/unicode"
) )
// TODO: FP64,unsigned/BE/LE? rename SFP32? // TODO: FP64,unsigned/BE/LE? rename SFP32?
@ -18,28 +19,19 @@ func (d *D) TryUTF8(nBytes int) (string, error) {
func (d *D) TryUTF16BE(nBytes int) (string, error) { func (d *D) TryUTF16BE(nBytes int) (string, error) {
b, err := d.bitBuf.BytesLen(nBytes) b, err := d.bitBuf.BytesLen(nBytes)
// TODO: len check
rs := make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
}
if err != nil { if err != nil {
return "", err return "", err
} }
return string(rs), nil return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
} }
func (d *D) TryUTF16LE(nBytes int) (string, error) { func (d *D) TryUTF16LE(nBytes int) (string, error) {
b, err := d.bitBuf.BytesLen(nBytes) b, err := d.bitBuf.BytesLen(nBytes)
// TODO: len check // TODO: len check
rs := make([]rune, len(b)/2)
for i := 0; i < len(b)/2; i++ {
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
}
if err != nil { if err != nil {
return "", err return "", err
} }
return string(rs), nil return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
} }
// TryUTF8ShortString read pascal short string, max nBytes // TryUTF8ShortString read pascal short string, max nBytes

View File

@ -17,6 +17,7 @@ def args_parse($args; $opts):
end; end;
def _parse_without_arg($new_args; $optname): def _parse_without_arg($new_args; $optname):
_parse($new_args; $flagmap; ($r | .parsed[$optname] = true)); _parse($new_args; $flagmap; ($r | .parsed[$optname] = true));
# this is to support --arg=VALUE
( ($args[0] | index("=")) as $assign_i ( ($args[0] | index("=")) as $assign_i
| ( if $assign_i then $args[0][0:$assign_i] | ( if $assign_i then $args[0][0:$assign_i]
else $args[0] else $args[0]

View File

@ -214,13 +214,16 @@ func (i *Interp) readline(c interface{}, a []interface{}) interface{} {
} }
} }
src, err := i.os.Readline(prompt, func(line string, pos int) (newLine []string, shared int) { src, err := i.os.Readline(
completeCtx, completeCtxCancelFn := context.WithTimeout(i.evalContext.ctx, 1*time.Second) prompt,
defer completeCtxCancelFn() func(line string, pos int) (newLine []string, shared int) {
// TODO: err completeCtx, completeCtxCancelFn := context.WithTimeout(i.evalContext.ctx, 1*time.Second)
names, shared, _ := completeTrampoline(completeCtx, completeFn, c, i, line, pos) defer completeCtxCancelFn()
return names, shared // TODO: err
}) names, shared, _ := completeTrampoline(completeCtx, completeFn, c, i, line, pos)
return names, shared
},
)
if errors.Is(err, ErrInterrupt) { if errors.Is(err, ErrInterrupt) {
return valueError{"interrupt"} return valueError{"interrupt"}