mirror of
https://github.com/wader/fq.git
synced 2024-11-22 07:16:49 +03:00
decode: Use golang.org/x/text for text decoding
This commit is contained in:
parent
85d17199de
commit
a49e924d5f
61
.vscode/settings.json
vendored
61
.vscode/settings.json
vendored
@ -1,45 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cSpell.words": [
|
"cSpell.words": [
|
||||||
"AACSSR",
|
"AACSSR",
|
||||||
|
"acsp",
|
||||||
|
"addrbase",
|
||||||
"APETAGEX",
|
"APETAGEX",
|
||||||
"APEV",
|
"APEV",
|
||||||
"APIC",
|
"APIC",
|
||||||
"Arity",
|
"Arity",
|
||||||
"BCDU",
|
"BCDU",
|
||||||
|
"bzip",
|
||||||
"CCIT",
|
"CCIT",
|
||||||
|
"chzyer",
|
||||||
"CLICOLOR",
|
"CLICOLOR",
|
||||||
"CLIUNICODE",
|
"CLIUNICODE",
|
||||||
"CLLID",
|
"CLLID",
|
||||||
"Equalisation",
|
|
||||||
"Errorer",
|
|
||||||
"Exif",
|
|
||||||
"Exiter",
|
|
||||||
"FALLID",
|
|
||||||
"GOPATH",
|
|
||||||
"HEIC",
|
|
||||||
"HEVC",
|
|
||||||
"Horz",
|
|
||||||
"IDID",
|
|
||||||
"IEND",
|
|
||||||
"IHDR",
|
|
||||||
"Linearization",
|
|
||||||
"MJPEG",
|
|
||||||
"OSFS",
|
|
||||||
"Packetized",
|
|
||||||
"RDBs",
|
|
||||||
"RELA",
|
|
||||||
"SMPTE",
|
|
||||||
"TSSE",
|
|
||||||
"UIDID",
|
|
||||||
"UTCID",
|
|
||||||
"Uinteger",
|
|
||||||
"Unsychronized",
|
|
||||||
"WEBP",
|
|
||||||
"Xiph",
|
|
||||||
"acsp",
|
|
||||||
"addrbase",
|
|
||||||
"bzip",
|
|
||||||
"chzyer",
|
|
||||||
"coef",
|
"coef",
|
||||||
"colorjson",
|
"colorjson",
|
||||||
"cpus",
|
"cpus",
|
||||||
@ -50,8 +24,13 @@
|
|||||||
"dumpaddr",
|
"dumpaddr",
|
||||||
"elif",
|
"elif",
|
||||||
"endians",
|
"endians",
|
||||||
|
"Equalisation",
|
||||||
|
"Errorer",
|
||||||
"errorln",
|
"errorln",
|
||||||
"esds",
|
"esds",
|
||||||
|
"Exif",
|
||||||
|
"Exiter",
|
||||||
|
"FALLID",
|
||||||
"fpbits",
|
"fpbits",
|
||||||
"fqtest",
|
"fqtest",
|
||||||
"ftyp",
|
"ftyp",
|
||||||
@ -62,9 +41,16 @@
|
|||||||
"gojq",
|
"gojq",
|
||||||
"gojqextra",
|
"gojqextra",
|
||||||
"golangci",
|
"golangci",
|
||||||
|
"GOPATH",
|
||||||
"gosec",
|
"gosec",
|
||||||
"gosimple",
|
"gosimple",
|
||||||
"gsub",
|
"gsub",
|
||||||
|
"HEIC",
|
||||||
|
"HEVC",
|
||||||
|
"Horz",
|
||||||
|
"IDID",
|
||||||
|
"IEND",
|
||||||
|
"IHDR",
|
||||||
"iinf",
|
"iinf",
|
||||||
"iloc",
|
"iloc",
|
||||||
"ilst",
|
"ilst",
|
||||||
@ -78,12 +64,14 @@
|
|||||||
"itchyny",
|
"itchyny",
|
||||||
"ldflags",
|
"ldflags",
|
||||||
"libavformat",
|
"libavformat",
|
||||||
|
"Linearization",
|
||||||
"localizable",
|
"localizable",
|
||||||
"luma",
|
"luma",
|
||||||
"matroska",
|
"matroska",
|
||||||
"mfhd",
|
"mfhd",
|
||||||
"mfra",
|
"mfra",
|
||||||
"mfro",
|
"mfro",
|
||||||
|
"MJPEG",
|
||||||
"moof",
|
"moof",
|
||||||
"moov",
|
"moov",
|
||||||
"mvex",
|
"mvex",
|
||||||
@ -94,17 +82,23 @@
|
|||||||
"nolint",
|
"nolint",
|
||||||
"obus",
|
"obus",
|
||||||
"origptr",
|
"origptr",
|
||||||
|
"OSFS",
|
||||||
|
"Packetized",
|
||||||
"pcap",
|
"pcap",
|
||||||
"pmezard",
|
"pmezard",
|
||||||
"println",
|
"println",
|
||||||
"protobuf",
|
"protobuf",
|
||||||
|
"PSSH",
|
||||||
"randomised",
|
"randomised",
|
||||||
|
"RDBs",
|
||||||
"recoverfn",
|
"recoverfn",
|
||||||
|
"RELA",
|
||||||
"rpad",
|
"rpad",
|
||||||
"rtrimstr",
|
"rtrimstr",
|
||||||
"sels",
|
"sels",
|
||||||
"shquote",
|
"shquote",
|
||||||
"sidx",
|
"sidx",
|
||||||
|
"SMPTE",
|
||||||
"stco",
|
"stco",
|
||||||
"stedolan",
|
"stedolan",
|
||||||
"struct",
|
"struct",
|
||||||
@ -129,10 +123,17 @@
|
|||||||
"traf",
|
"traf",
|
||||||
"trex",
|
"trex",
|
||||||
"trun",
|
"trun",
|
||||||
|
"TSSE",
|
||||||
"udta",
|
"udta",
|
||||||
"ufffd",
|
"ufffd",
|
||||||
|
"UIDID",
|
||||||
|
"Uinteger",
|
||||||
"unconvert",
|
"unconvert",
|
||||||
"unparam",
|
"unparam",
|
||||||
|
"Unsychronized",
|
||||||
|
"UTCID",
|
||||||
|
"WEBP",
|
||||||
|
"Xiph",
|
||||||
"xrange"
|
"xrange"
|
||||||
],
|
],
|
||||||
"editor.formatOnSave": true,
|
"editor.formatOnSave": true,
|
||||||
|
@ -6,7 +6,6 @@ package id3
|
|||||||
// https://id3.org/id3v2-chapters-1.0
|
// https://id3.org/id3v2-chapters-1.0
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
@ -14,6 +13,9 @@ import (
|
|||||||
"github.com/wader/fq/format"
|
"github.com/wader/fq/format"
|
||||||
"github.com/wader/fq/format/registry"
|
"github.com/wader/fq/format/registry"
|
||||||
"github.com/wader/fq/pkg/decode"
|
"github.com/wader/fq/pkg/decode"
|
||||||
|
"golang.org/x/text/encoding"
|
||||||
|
"golang.org/x/text/encoding/charmap"
|
||||||
|
"golang.org/x/text/encoding/unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
var imageFormat []*decode.Format
|
var imageFormat []*decode.Format
|
||||||
@ -237,54 +239,30 @@ var encodingNames = map[uint64]string{
|
|||||||
encodingUTF8: "UTF-8",
|
encodingUTF8: "UTF-8",
|
||||||
}
|
}
|
||||||
|
|
||||||
var encodingLen = map[uint64]int{
|
var encodingLen = map[uint64]int64{
|
||||||
encodingISO8859_1: 1,
|
encodingISO8859_1: 1,
|
||||||
encodingUTF16: 2,
|
encodingUTF16: 2,
|
||||||
encodingUTF16BE: 2,
|
encodingUTF16BE: 2,
|
||||||
encodingUTF8: 1,
|
encodingUTF8: 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
var encodingToUTF8 = map[int]func(b []byte) string{
|
func decodeToString(e int, b []byte) string {
|
||||||
encodingISO8859_1: func(b []byte) string {
|
var enc encoding.Encoding
|
||||||
rs := make([]rune, len(b))
|
|
||||||
for i, r := range b {
|
switch e {
|
||||||
rs[i] = rune(r)
|
case encodingISO8859_1:
|
||||||
}
|
enc = charmap.ISO8859_1
|
||||||
return string(rs)
|
case encodingUTF16:
|
||||||
},
|
enc = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
|
||||||
encodingUTF16: func(b []byte) string {
|
case encodingUTF16BE:
|
||||||
beBOM := []byte("\xfe\xff")
|
enc = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
|
||||||
leBOM := []byte("\xff\xfe")
|
default:
|
||||||
var rs []rune
|
enc = unicode.UTF8
|
||||||
switch {
|
}
|
||||||
case bytes.HasPrefix(b, leBOM):
|
|
||||||
// strip BOM
|
// TODO: try decode?
|
||||||
b = b[2:]
|
s, _ := enc.NewDecoder().String(string(b))
|
||||||
rs = make([]rune, len(b)/2)
|
return s
|
||||||
for i := 0; i < len(b)/2; i++ {
|
|
||||||
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
|
|
||||||
}
|
|
||||||
case bytes.HasPrefix(b, beBOM):
|
|
||||||
b = b[2:]
|
|
||||||
fallthrough
|
|
||||||
default:
|
|
||||||
rs = make([]rune, len(b)/2)
|
|
||||||
for i := 0; i < len(b)/2; i++ {
|
|
||||||
rs[i] = rune(uint(b[i*2])<<8 | uint(b[i*2+1]))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return string(rs)
|
|
||||||
},
|
|
||||||
encodingUTF16BE: func(b []byte) string {
|
|
||||||
rs := make([]rune, len(b)/2)
|
|
||||||
for i := 0; i < len(b)/2; i++ {
|
|
||||||
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
|
|
||||||
}
|
|
||||||
return string(rs)
|
|
||||||
},
|
|
||||||
encodingUTF8: func(b []byte) string {
|
|
||||||
return string(b)
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func syncSafeU32(d *decode.D) uint64 {
|
func syncSafeU32(d *decode.D) uint64 {
|
||||||
@ -300,11 +278,7 @@ func syncSafeU32(d *decode.D) uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func text(d *decode.D, encoding int, nBytes int) string {
|
func text(d *decode.D, encoding int, nBytes int) string {
|
||||||
encodingFn := encodingToUTF8[encodingUTF8]
|
return strings.TrimRight(decodeToString(encoding, d.BytesLen(nBytes)), "\x00")
|
||||||
if fn, ok := encodingToUTF8[encoding]; ok {
|
|
||||||
encodingFn = fn
|
|
||||||
}
|
|
||||||
return strings.TrimRight(encodingFn(d.BytesLen(nBytes)), "\x00")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func textNull(d *decode.D, encoding int) string {
|
func textNull(d *decode.D, encoding int) string {
|
||||||
@ -313,11 +287,17 @@ func textNull(d *decode.D, encoding int) string {
|
|||||||
nullLen = n
|
nullLen = n
|
||||||
}
|
}
|
||||||
|
|
||||||
offset, _ := d.PeekFind(nullLen*8, 8, func(v uint64) bool { return v == 0 }, -1)
|
offset, _ := d.PeekFind(
|
||||||
pos := offset / 8
|
int(nullLen)*8,
|
||||||
textLen := int(pos + int64(nullLen))
|
nullLen*8,
|
||||||
text := text(d, encoding, textLen)
|
func(v uint64) bool { return v == 0 },
|
||||||
|
-1,
|
||||||
|
)
|
||||||
|
offsetBytes := offset / 8
|
||||||
|
text := text(d, encoding, int(offsetBytes))
|
||||||
|
|
||||||
|
d.SeekRel(nullLen * 8)
|
||||||
|
// seems sometimes utf16 etc has en exta null byte
|
||||||
if nullLen > 1 && d.PeekBits(8) == 0 {
|
if nullLen > 1 && d.PeekBits(8) == 0 {
|
||||||
d.SeekRel(8)
|
d.SeekRel(8)
|
||||||
}
|
}
|
||||||
|
4
go.mod
4
go.mod
@ -11,6 +11,10 @@ require (
|
|||||||
github.com/wader/gojq v0.12.1-0.20210903162226-412e6dd62f26
|
github.com/wader/gojq v0.12.1-0.20210903162226-412e6dd62f26
|
||||||
// fork of github.com/chzyer/readline
|
// fork of github.com/chzyer/readline
|
||||||
github.com/wader/readline v0.0.0-20210817095433-c868eb04b8b2
|
github.com/wader/readline v0.0.0-20210817095433-c868eb04b8b2
|
||||||
|
|
||||||
|
// bump: golang/text /golang\.org\/x\/text v(.*)/ git://github.com/golang/text|^0
|
||||||
|
// bump: golang/text command go get -d golang.org/x/text@v$LATEST && go mod tidy
|
||||||
|
golang.org/x/text v0.3.7
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
3
go.sum
3
go.sum
@ -22,6 +22,9 @@ golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7w
|
|||||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e h1:XMgFehsDnnLGtjvjOfqWSUzt0alpTR1RSEuznObga2c=
|
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e h1:XMgFehsDnnLGtjvjOfqWSUzt0alpTR1RSEuznObga2c=
|
||||||
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
|
|
||||||
"github.com/wader/fq/pkg/bitio"
|
"github.com/wader/fq/pkg/bitio"
|
||||||
|
"golang.org/x/text/encoding/unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: FP64,unsigned/BE/LE? rename SFP32?
|
// TODO: FP64,unsigned/BE/LE? rename SFP32?
|
||||||
@ -18,28 +19,19 @@ func (d *D) TryUTF8(nBytes int) (string, error) {
|
|||||||
|
|
||||||
func (d *D) TryUTF16BE(nBytes int) (string, error) {
|
func (d *D) TryUTF16BE(nBytes int) (string, error) {
|
||||||
b, err := d.bitBuf.BytesLen(nBytes)
|
b, err := d.bitBuf.BytesLen(nBytes)
|
||||||
// TODO: len check
|
|
||||||
rs := make([]rune, len(b)/2)
|
|
||||||
for i := 0; i < len(b)/2; i++ {
|
|
||||||
rs[i] = rune(uint(b[i*2])<<8 + uint(b[i*2+1]))
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return string(rs), nil
|
return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *D) TryUTF16LE(nBytes int) (string, error) {
|
func (d *D) TryUTF16LE(nBytes int) (string, error) {
|
||||||
b, err := d.bitBuf.BytesLen(nBytes)
|
b, err := d.bitBuf.BytesLen(nBytes)
|
||||||
// TODO: len check
|
// TODO: len check
|
||||||
rs := make([]rune, len(b)/2)
|
|
||||||
for i := 0; i < len(b)/2; i++ {
|
|
||||||
rs[i] = rune(uint(b[i*2]) | uint(b[i*2+1])<<8)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return string(rs), nil
|
return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder().String(string(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TryUTF8ShortString read pascal short string, max nBytes
|
// TryUTF8ShortString read pascal short string, max nBytes
|
||||||
|
@ -17,6 +17,7 @@ def args_parse($args; $opts):
|
|||||||
end;
|
end;
|
||||||
def _parse_without_arg($new_args; $optname):
|
def _parse_without_arg($new_args; $optname):
|
||||||
_parse($new_args; $flagmap; ($r | .parsed[$optname] = true));
|
_parse($new_args; $flagmap; ($r | .parsed[$optname] = true));
|
||||||
|
# this is to support --arg=VALUE
|
||||||
( ($args[0] | index("=")) as $assign_i
|
( ($args[0] | index("=")) as $assign_i
|
||||||
| ( if $assign_i then $args[0][0:$assign_i]
|
| ( if $assign_i then $args[0][0:$assign_i]
|
||||||
else $args[0]
|
else $args[0]
|
||||||
|
@ -214,13 +214,16 @@ func (i *Interp) readline(c interface{}, a []interface{}) interface{} {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
src, err := i.os.Readline(prompt, func(line string, pos int) (newLine []string, shared int) {
|
src, err := i.os.Readline(
|
||||||
completeCtx, completeCtxCancelFn := context.WithTimeout(i.evalContext.ctx, 1*time.Second)
|
prompt,
|
||||||
defer completeCtxCancelFn()
|
func(line string, pos int) (newLine []string, shared int) {
|
||||||
// TODO: err
|
completeCtx, completeCtxCancelFn := context.WithTimeout(i.evalContext.ctx, 1*time.Second)
|
||||||
names, shared, _ := completeTrampoline(completeCtx, completeFn, c, i, line, pos)
|
defer completeCtxCancelFn()
|
||||||
return names, shared
|
// TODO: err
|
||||||
})
|
names, shared, _ := completeTrampoline(completeCtx, completeFn, c, i, line, pos)
|
||||||
|
return names, shared
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
if errors.Is(err, ErrInterrupt) {
|
if errors.Is(err, ErrInterrupt) {
|
||||||
return valueError{"interrupt"}
|
return valueError{"interrupt"}
|
||||||
|
Loading…
Reference in New Issue
Block a user