mirror of
https://github.com/wader/fq.git
synced 2024-12-25 22:34:14 +03:00
cae288e6be
json, yaml, toml, xml, html, csv are now normal formats and most of them also particiate in probing (not html and csv). Also fixes a bunch of bugs in to/fromxml, to/fromjq etc.
243 lines
5.7 KiB
Go
243 lines
5.7 KiB
Go
package text
|
|
|
|
import (
|
|
"bytes"
|
|
"embed"
|
|
"encoding/base64"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
|
|
"github.com/wader/fq/pkg/bitio"
|
|
"github.com/wader/fq/pkg/interp"
|
|
"golang.org/x/text/encoding"
|
|
"golang.org/x/text/encoding/charmap"
|
|
"golang.org/x/text/encoding/unicode"
|
|
)
|
|
|
|
//go:embed encoding.jq
|
|
var textFS embed.FS
|
|
|
|
func init() {
|
|
interp.RegisterFunc0("fromhex", func(_ *interp.Interp, c string) any {
|
|
b, err := hex.DecodeString(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
bb, err := interp.NewBinaryFromBitReader(bitio.NewBitReader(b, -1), 8, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return bb
|
|
})
|
|
interp.RegisterFunc0("tohex", func(_ *interp.Interp, c string) any {
|
|
br, err := interp.ToBitReader(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
buf := &bytes.Buffer{}
|
|
if _, err := io.Copy(hex.NewEncoder(buf), bitio.NewIOReader(br)); err != nil {
|
|
return err
|
|
}
|
|
return buf.String()
|
|
})
|
|
|
|
// TODO: other encodings and share?
|
|
base64Encoding := func(enc string) *base64.Encoding {
|
|
switch enc {
|
|
case "url":
|
|
return base64.URLEncoding
|
|
case "rawstd":
|
|
return base64.RawStdEncoding
|
|
case "rawurl":
|
|
return base64.RawURLEncoding
|
|
default:
|
|
return base64.StdEncoding
|
|
}
|
|
}
|
|
type fromBase64Opts struct {
|
|
Encoding string
|
|
}
|
|
interp.RegisterFunc1("_frombase64", func(_ *interp.Interp, c string, opts fromBase64Opts) any {
|
|
b, err := base64Encoding(opts.Encoding).DecodeString(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
bin, err := interp.NewBinaryFromBitReader(bitio.NewBitReader(b, -1), 8, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return bin
|
|
})
|
|
type toBase64Opts struct {
|
|
Encoding string
|
|
}
|
|
interp.RegisterFunc1("_tobase64", func(_ *interp.Interp, c string, opts toBase64Opts) any {
|
|
br, err := interp.ToBitReader(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
bb := &bytes.Buffer{}
|
|
wc := base64.NewEncoder(base64Encoding(opts.Encoding), bb)
|
|
if _, err := io.Copy(wc, bitio.NewIOReader(br)); err != nil {
|
|
return err
|
|
}
|
|
wc.Close()
|
|
return bb.String()
|
|
})
|
|
|
|
strEncoding := func(s string) encoding.Encoding {
|
|
switch s {
|
|
case "UTF8":
|
|
return unicode.UTF8
|
|
case "UTF16":
|
|
return unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
|
|
case "UTF16LE":
|
|
return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
|
|
case "UTF16BE":
|
|
return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
|
|
case "CodePage037":
|
|
return charmap.CodePage037
|
|
case "CodePage437":
|
|
return charmap.CodePage437
|
|
case "CodePage850":
|
|
return charmap.CodePage850
|
|
case "CodePage852":
|
|
return charmap.CodePage852
|
|
case "CodePage855":
|
|
return charmap.CodePage855
|
|
case "CodePage858":
|
|
return charmap.CodePage858
|
|
case "CodePage860":
|
|
return charmap.CodePage860
|
|
case "CodePage862":
|
|
return charmap.CodePage862
|
|
case "CodePage863":
|
|
return charmap.CodePage863
|
|
case "CodePage865":
|
|
return charmap.CodePage865
|
|
case "CodePage866":
|
|
return charmap.CodePage866
|
|
case "CodePage1047":
|
|
return charmap.CodePage1047
|
|
case "CodePage1140":
|
|
return charmap.CodePage1140
|
|
case "ISO8859_1":
|
|
return charmap.ISO8859_1
|
|
case "ISO8859_2":
|
|
return charmap.ISO8859_2
|
|
case "ISO8859_3":
|
|
return charmap.ISO8859_3
|
|
case "ISO8859_4":
|
|
return charmap.ISO8859_4
|
|
case "ISO8859_5":
|
|
return charmap.ISO8859_5
|
|
case "ISO8859_6":
|
|
return charmap.ISO8859_6
|
|
case "ISO8859_6E":
|
|
return charmap.ISO8859_6E
|
|
case "ISO8859_6I":
|
|
return charmap.ISO8859_6I
|
|
case "ISO8859_7":
|
|
return charmap.ISO8859_7
|
|
case "ISO8859_8":
|
|
return charmap.ISO8859_8
|
|
case "ISO8859_8E":
|
|
return charmap.ISO8859_8E
|
|
case "ISO8859_8I":
|
|
return charmap.ISO8859_8I
|
|
case "ISO8859_9":
|
|
return charmap.ISO8859_9
|
|
case "ISO8859_10":
|
|
return charmap.ISO8859_10
|
|
case "ISO8859_13":
|
|
return charmap.ISO8859_13
|
|
case "ISO8859_14":
|
|
return charmap.ISO8859_14
|
|
case "ISO8859_15":
|
|
return charmap.ISO8859_15
|
|
case "ISO8859_16":
|
|
return charmap.ISO8859_16
|
|
case "KOI8R":
|
|
return charmap.KOI8R
|
|
case "KOI8U":
|
|
return charmap.KOI8U
|
|
case "Macintosh":
|
|
return charmap.Macintosh
|
|
case "MacintoshCyrillic":
|
|
return charmap.MacintoshCyrillic
|
|
case "Windows874":
|
|
return charmap.Windows874
|
|
case "Windows1250":
|
|
return charmap.Windows1250
|
|
case "Windows1251":
|
|
return charmap.Windows1251
|
|
case "Windows1252":
|
|
return charmap.Windows1252
|
|
case "Windows1253":
|
|
return charmap.Windows1253
|
|
case "Windows1254":
|
|
return charmap.Windows1254
|
|
case "Windows1255":
|
|
return charmap.Windows1255
|
|
case "Windows1256":
|
|
return charmap.Windows1256
|
|
case "Windows1257":
|
|
return charmap.Windows1257
|
|
case "Windows1258":
|
|
return charmap.Windows1258
|
|
case "XUserDefined":
|
|
return charmap.XUserDefined
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
type toStrEncodingOpts struct {
|
|
Encoding string
|
|
}
|
|
interp.RegisterFunc1("_tostrencoding", func(_ *interp.Interp, c string, opts toStrEncodingOpts) any {
|
|
h := strEncoding(opts.Encoding)
|
|
if h == nil {
|
|
return fmt.Errorf("unknown string encoding %s", opts.Encoding)
|
|
}
|
|
|
|
bb := &bytes.Buffer{}
|
|
if _, err := io.Copy(h.NewEncoder().Writer(bb), strings.NewReader(c)); err != nil {
|
|
return err
|
|
}
|
|
outBR := bitio.NewBitReader(bb.Bytes(), -1)
|
|
bin, err := interp.NewBinaryFromBitReader(outBR, 8, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return bin
|
|
})
|
|
|
|
type fromStrEncodingOpts struct {
|
|
Encoding string
|
|
}
|
|
interp.RegisterFunc1("_fromstrencoding", func(_ *interp.Interp, c any, opts fromStrEncodingOpts) any {
|
|
inBR, err := interp.ToBitReader(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
h := strEncoding(opts.Encoding)
|
|
if h == nil {
|
|
return fmt.Errorf("unknown string encoding %s", opts.Encoding)
|
|
}
|
|
|
|
bb := &bytes.Buffer{}
|
|
if _, err := io.Copy(bb, h.NewDecoder().Reader(bitio.NewIOReader(inBR))); err != nil {
|
|
|
|
return err
|
|
}
|
|
|
|
return bb.String()
|
|
})
|
|
|
|
interp.RegisterFS(textFS)
|
|
}
|