mirror of
https://github.com/wader/fq.git
synced 2024-11-22 07:16:49 +03:00
decode: Use golang.org/x/text for text decoding
This commit is contained in:
parent
85d17199de
commit
a49e924d5f
61
.vscode/settings.json
vendored
61
.vscode/settings.json
vendored
@ -1,45 +1,19 @@
|
||||
{
|
||||
"cSpell.words": [
|
||||
"AACSSR",
|
||||
"acsp",
|
||||
"addrbase",
|
||||
"APETAGEX",
|
||||
"APEV",
|
||||
"APIC",
|
||||
"Arity",
|
||||
"BCDU",
|
||||
"bzip",
|
||||
"CCIT",
|
||||
"chzyer",
|
||||
"CLICOLOR",
|
||||
"CLIUNICODE",
|
||||
"CLLID",
|
||||
"Equalisation",
|
||||
"Errorer",
|
||||
"Exif",
|
||||
"Exiter",
|
||||
"FALLID",
|
||||
"GOPATH",
|
||||
"HEIC",
|
||||
"HEVC",
|
||||
"Horz",
|
||||
"IDID",
|
||||
"IEND",
|
||||
"IHDR",
|
||||
"Linearization",
|
||||
"MJPEG",
|
||||
"OSFS",
|
||||
"Packetized",
|
||||
"RDBs",
|
||||
"RELA",
|
||||
"SMPTE",
|
||||
"TSSE",
|
||||
"UIDID",
|
||||
"UTCID",
|
||||
"Uinteger",
|
||||
"Unsychronized",
|
||||
"WEBP",
|
||||
"Xiph",
|
||||
"acsp",
|
||||
"addrbase",
|
||||
"bzip",
|
||||
"chzyer",
|
||||
"coef",
|
||||
"colorjson",
|
||||
"cpus",
|
||||
@ -50,8 +24,13 @@
|
||||
"dumpaddr",
|
||||
"elif",
|
||||
"endians",
|
||||
"Equalisation",
|
||||
"Errorer",
|
||||
"errorln",
|
||||
"esds",
|
||||
"Exif",
|
||||
"Exiter",
|
||||
"FALLID",
|
||||
"fpbits",
|
||||
"fqtest",
|
||||
"ftyp",
|
||||
@ -62,9 +41,16 @@
|
||||
"gojq",
|
||||
"gojqextra",
|
||||
"golangci",
|
||||
"GOPATH",
|
||||
"gosec",
|
||||
"gosimple",
|
||||
"gsub",
|
||||
"HEIC",
|
||||
"HEVC",
|
||||
"Horz",
|
||||
"IDID",
|
||||
"IEND",
|
||||
"IHDR",
|
||||
"iinf",
|
||||
"iloc",
|
||||
"ilst",
|
||||
@ -78,12 +64,14 @@
|
||||
"itchyny",
|
||||
"ldflags",
|
||||
"libavformat",
|
||||
"Linearization",
|
||||
"localizable",
|
||||
"luma",
|
||||
"matroska",
|
||||
"mfhd",
|
||||
"mfra",
|
||||
"mfro",
|
||||
"MJPEG",
|
||||
"moof",
|
||||
"moov",
|
||||
"mvex",
|
||||
@ -94,17 +82,23 @@
|
||||
"nolint",
|
||||
"obus",
|
||||
"origptr",
|
||||
"OSFS",
|
||||
"Packetized",
|
||||
"pcap",
|
||||
"pmezard",
|
||||
"println",
|
||||
"protobuf",
|
||||
"PSSH",
|
||||
"randomised",
|
||||
"RDBs",
|
||||
"recoverfn",
|
||||
"RELA",
|
||||
"rpad",
|
||||
"rtrimstr",
|
||||
"sels",
|
||||
"shquote",
|
||||
"sidx",
|
||||
"SMPTE",
|
||||
"stco",
|
||||
"stedolan",
|
||||
"struct",
|
||||
@ -129,10 +123,17 @@
|
||||
"traf",
|
||||
"trex",
|
||||
"trun",
|
||||
"TSSE",
|
||||
"udta",
|
||||
"ufffd",
|
||||
"UIDID",
|
||||
"Uinteger",
|
||||
"unconvert",
|
||||
"unparam",
|
||||
"Unsychronized",
|
||||
"UTCID",
|
||||
"WEBP",
|
||||
"Xiph",
|
||||
"xrange"
|
||||
],
|
||||
"editor.formatOnSave": true,
|
||||
|
@ -6,7 +6,6 @@ package id3
|
||||
// https://id3.org/id3v2-chapters-1.0
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
@ -14,6 +13,9 @@ import (
|
||||
"github.com/wader/fq/format"
|
||||
"github.com/wader/fq/format/registry"
|
||||
"github.com/wader/fq/pkg/decode"
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
var imageFormat []*decode.Format
|
||||
@ -237,54 +239,30 @@ var encodingNames = map[uint64]string{
|
||||
encodingUTF8: "UTF-8",
|
||||
}
|
||||
|
||||
var encodingLen = map[uint64]int{
|
||||
var encodingLen = map[uint64]int64{
|
||||
encodingISO8859_1: 1,
|
||||
encodingUTF16: 2,
|
||||
encodingUTF16BE: 2,
|
||||
encodingUTF8: 1,
|
||||
}
|
||||
|
||||
var encodingToUTF8 = map[int]func(b []byte) string{
|
||||
encodingISO8859_1: func(b []byte) string {
|
||||
rs := make([]rune, len(b))
|
||||
for i, r := range b {
|
||||
rs[i] = rune(r)
|
||||
}
|
||||
return string(rs)
|
||||
},
|
||||
encodingUTF16: func(b []byte) string {
|
||||
beBOM := []byte("\xfe\xff")
|
||||
leBOM := []byte("\xff\xfe")
|
||||
var rs []rune
|
||||
switch {
|
||||
case bytes.HasPrefix(b, leBOM):
|
||||
// strip BOM
|
||||
b = b[2:]
|
||||
rs = make([]rune, len(b)/2)
|
||||
for i := 0; i < len(b)/2; i++ {
|
||||
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
|
||||
}
|
||||
case bytes.HasPrefix(b, beBOM):
|
||||
b = b[2:]
|
||||
fallthrough
|
||||
func decodeToString(e int, b []byte) string {
|
||||
var enc encoding.Encoding
|
||||
|
||||
switch e {
|
||||
case encodingISO8859_1:
|
||||
enc = charmap.ISO8859_1
|
||||
case encodingUTF16:
|
||||
enc = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
|
||||
case encodingUTF16BE:
|
||||
enc = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
|
||||
default:
|
||||
rs = make([]rune, len(b)/2)
|
||||
for i := 0; i < len(b)/2; i++ {
|
||||
rs[i] = rune(uint(b[i*2])<<8 | uint(b[i*2+1]))
|
||||
enc = unicode.UTF8
|
||||
}
|
||||
}
|
||||
return string(rs)
|
||||
},
|
||||
encodingUTF16BE: func(b []byte) string {
|
||||
rs := make([]rune, len(b)/2)
|
||||
for i := 0; i < len(b)/2; i++ {
|
||||
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
|
||||
}
|
||||
return string(rs)
|
||||
},
|
||||
encodingUTF8: func(b []byte) string {
|
||||
return string(b)
|
||||
},
|
||||
|
||||
// TODO: try decode?
|
||||
s, _ := enc.NewDecoder().String(string(b))
|
||||
return s
|
||||
}
|
||||
|
||||
func syncSafeU32(d *decode.D) uint64 {
|
||||
@ -300,11 +278,7 @@ func syncSafeU32(d *decode.D) uint64 {
|
||||
}
|
||||
|
||||
func text(d *decode.D, encoding int, nBytes int) string {
|
||||
encodingFn := encodingToUTF8[encodingUTF8]
|
||||
if fn, ok := encodingToUTF8[encoding]; ok {
|
||||
encodingFn = fn
|
||||
}
|
||||
return strings.TrimRight(encodingFn(d.BytesLen(nBytes)), "\x00")
|
||||
return strings.TrimRight(decodeToString(encoding, d.BytesLen(nBytes)), "\x00")
|
||||
}
|
||||
|
||||
func textNull(d *decode.D, encoding int) string {
|
||||
@ -313,11 +287,17 @@ func textNull(d *decode.D, encoding int) string {
|
||||
nullLen = n
|
||||
}
|
||||
|
||||
offset, _ := d.PeekFind(nullLen*8, 8, func(v uint64) bool { return v == 0 }, -1)
|
||||
pos := offset / 8
|
||||
textLen := int(pos + int64(nullLen))
|
||||
text := text(d, encoding, textLen)
|
||||
offset, _ := d.PeekFind(
|
||||
int(nullLen)*8,
|
||||
nullLen*8,
|
||||
func(v uint64) bool { return v == 0 },
|
||||
-1,
|
||||
)
|
||||
offsetBytes := offset / 8
|
||||
text := text(d, encoding, int(offsetBytes))
|
||||
|
||||
d.SeekRel(nullLen * 8)
|
||||
// seems sometimes utf16 etc has en exta null byte
|
||||
if nullLen > 1 && d.PeekBits(8) == 0 {
|
||||
d.SeekRel(8)
|
||||
}
|
||||
|
4
go.mod
4
go.mod
@ -11,6 +11,10 @@ require (
|
||||
github.com/wader/gojq v0.12.1-0.20210903162226-412e6dd62f26
|
||||
// fork of github.com/chzyer/readline
|
||||
github.com/wader/readline v0.0.0-20210817095433-c868eb04b8b2
|
||||
|
||||
// bump: golang/text /golang\.org\/x\/text v(.*)/ git://github.com/golang/text|^0
|
||||
// bump: golang/text command go get -d golang.org/x/text@v$LATEST && go mod tidy
|
||||
golang.org/x/text v0.3.7
|
||||
)
|
||||
|
||||
require (
|
||||
|
3
go.sum
3
go.sum
@ -22,6 +22,9 @@ golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e h1:XMgFehsDnnLGtjvjOfqWSUzt0alpTR1RSEuznObga2c=
|
||||
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"io"
|
||||
|
||||
"github.com/wader/fq/pkg/bitio"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
// TODO: FP64,unsigned/BE/LE? rename SFP32?
|
||||
@ -18,28 +19,19 @@ func (d *D) TryUTF8(nBytes int) (string, error) {
|
||||
|
||||
func (d *D) TryUTF16BE(nBytes int) (string, error) {
|
||||
b, err := d.bitBuf.BytesLen(nBytes)
|
||||
// TODO: len check
|
||||
rs := make([]rune, len(b)/2)
|
||||
for i := 0; i < len(b)/2; i++ {
|
||||
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
|
||||
}
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(rs), nil
|
||||
return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
|
||||
}
|
||||
|
||||
func (d *D) TryUTF16LE(nBytes int) (string, error) {
|
||||
b, err := d.bitBuf.BytesLen(nBytes)
|
||||
// TODO: len check
|
||||
rs := make([]rune, len(b)/2)
|
||||
for i := 0; i < len(b)/2; i++ {
|
||||
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
|
||||
}
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(rs), nil
|
||||
return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
|
||||
}
|
||||
|
||||
// TryUTF8ShortString read pascal short string, max nBytes
|
||||
|
@ -17,6 +17,7 @@ def args_parse($args; $opts):
|
||||
end;
|
||||
def _parse_without_arg($new_args; $optname):
|
||||
_parse($new_args; $flagmap; ($r | .parsed[$optname] = true));
|
||||
# this is to support --arg=VALUE
|
||||
( ($args[0] | index("=")) as $assign_i
|
||||
| ( if $assign_i then $args[0][0:$assign_i]
|
||||
else $args[0]
|
||||
|
@ -214,13 +214,16 @@ func (i *Interp) readline(c interface{}, a []interface{}) interface{} {
|
||||
}
|
||||
}
|
||||
|
||||
src, err := i.os.Readline(prompt, func(line string, pos int) (newLine []string, shared int) {
|
||||
src, err := i.os.Readline(
|
||||
prompt,
|
||||
func(line string, pos int) (newLine []string, shared int) {
|
||||
completeCtx, completeCtxCancelFn := context.WithTimeout(i.evalContext.ctx, 1*time.Second)
|
||||
defer completeCtxCancelFn()
|
||||
// TODO: err
|
||||
names, shared, _ := completeTrampoline(completeCtx, completeFn, c, i, line, pos)
|
||||
return names, shared
|
||||
})
|
||||
},
|
||||
)
|
||||
|
||||
if errors.Is(err, ErrInterrupt) {
|
||||
return valueError{"interrupt"}
|
||||
|
Loading…
Reference in New Issue
Block a user