1
1
mirror of https://github.com/wader/fq.git synced 2024-12-18 10:52:44 +03:00
fq/format/bzip2/bzip2.go
2021-09-12 13:08:50 +02:00

122 lines
3.0 KiB
Go

package bzip2
// https://en.wikipedia.org/wiki/Bzip2
// https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
// TODO: multiple streams, possible to figure out length of compressed? use footer magic?
// TODO: empty file, no streams
import (
"bytes"
"compress/bzip2"
"hash/crc32"
"io"
"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/decode"
)
var probeFormat []*decode.Format
func init() {
registry.MustRegister(&decode.Format{
Name: format.BZIP2,
Description: "bzip2 compression",
Groups: []string{format.PROBE},
DecodeFn: gzDecode,
Dependencies: []decode.Dependency{
{Names: []string{format.PROBE}, Formats: &probeFormat},
},
})
}
func gzDecode(d *decode.D, in interface{}) interface{} {
// moreStreams := true
// d.FieldArrayFn("streams", func(d *decode.D) {
// for moreStreams {
// d.FieldStructFn("stream", func(d *decode.D) {
d.FieldValidateUTF8("magic", "BZ")
d.FieldU8("version")
d.FieldU8("hundred_k_blocksize")
d.FieldStructFn("block", func(d *decode.D) {
const blockHeaderMagic = 0x31_41_59_26_53_59
// if d.PeekBits(48) != blockHeaderMagic {
// moreStreams = false
// return
// }
d.FieldValidateUFn("compressed_magic", blockHeaderMagic, d.U48)
d.FieldU32("crc")
d.FieldU1("randomised")
d.FieldU24("origptr")
d.FieldU16("syncmapl1")
d.SeekRel(-16)
ranges := 0
for i := 0; i < 16; i++ {
if d.Bool() {
ranges++
}
}
d.FieldBitBufLen("syncmapl2", int64(ranges)*16)
numTrees := d.FieldU3("num_trees")
selectorsUsed := d.FieldU15("num_sels")
selectorsI := uint64(0)
d.FieldArrayLoopFn("selector_list", func() bool { return selectorsI < selectorsUsed }, func(d *decode.D) {
d.FieldU1("selector")
selectorsI++
})
treesI := uint64(0)
d.FieldArrayLoopFn("trees", func() bool { return treesI < numTrees }, func(d *decode.D) {
d.FieldUFn("tree", func() (uint64, decode.DisplayFormat, string) {
l := d.U5()
if !d.Bool() {
return l, decode.NumberDecimal, ""
}
if d.Bool() {
l--
} else {
l++
}
return l, decode.NumberDecimal, ""
})
treesI++
})
})
compressedBB := d.BitBufRange(0, d.Len())
deflateR := bzip2.NewReader(compressedBB)
uncompressed := &bytes.Buffer{}
crc32W := crc32.NewIEEE()
if _, err := io.Copy(io.MultiWriter(uncompressed, crc32W), deflateR); err != nil { //nolint:gosec
d.Invalid(err.Error())
}
// calculatedCRC32 := crc32W.Sum(nil)
uncompressedBB := bitio.NewBufferFromBytes(uncompressed.Bytes(), -1)
dv, _, _ := d.FieldTryFormatBitBuf("uncompressed", uncompressedBB, probeFormat)
if dv == nil {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
// if calculatedCRC32 != nil {
// d.FieldChecksumLen("crc32", 32, calculatedCRC32, decode.LittleEndian)
// } else {
// d.FieldU32LE("crc32")
// }
// d.FieldU48("footer_magic")
// d.FieldU32("crc")
// byte align padding
// })
// moreStreams = false
// }
// })
return nil
}