mirror of
https://github.com/wader/fq.git
synced 2024-12-28 08:02:28 +03:00
gzip,bzip2: Calculate CRC
This commit is contained in:
parent
606c0b67ae
commit
ee611a489a
@ -7,7 +7,11 @@ package bzip2
|
||||
|
||||
import (
|
||||
"compress/bzip2"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"math/bits"
|
||||
|
||||
"github.com/wader/fq/format"
|
||||
"github.com/wader/fq/format/registry"
|
||||
@ -31,6 +35,18 @@ func init() {
|
||||
const blockMagic = 0x31_41_59_26_53_59
|
||||
const footerMagic = 0x17_72_45_38_50_90
|
||||
|
||||
type bitFlipReader struct {
|
||||
r io.Reader
|
||||
}
|
||||
|
||||
func (bfr bitFlipReader) Read(p []byte) (n int, err error) {
|
||||
n, err = bfr.r.Read(p)
|
||||
for i := 0; i < n; i++ {
|
||||
p[i] = bits.Reverse8(p[i])
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func bzip2Decode(d *decode.D, in interface{}) interface{} {
|
||||
// moreStreams := true
|
||||
|
||||
@ -38,6 +54,9 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
|
||||
// for moreStreams {
|
||||
// d.FieldStruct("stream", func(d *decode.D) {
|
||||
|
||||
var blockCRCValue *decode.Value
|
||||
var streamCRCN uint32
|
||||
|
||||
d.FieldUTF8("magic", 2, d.AssertStr("BZ"))
|
||||
d.FieldU8("version")
|
||||
d.FieldU8("hundred_k_blocksize")
|
||||
@ -49,6 +68,7 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
|
||||
// }
|
||||
d.FieldU48("magic", d.AssertU(blockMagic), d.Hex)
|
||||
d.FieldU32("crc", d.Hex)
|
||||
blockCRCValue = d.FieldGet("crc")
|
||||
d.FieldU1("randomised")
|
||||
d.FieldU24("origptr")
|
||||
d.FieldU16("syncmapl1")
|
||||
@ -93,12 +113,13 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
|
||||
d.FieldRootBitBuf("uncompressed", uncompressedBB)
|
||||
}
|
||||
|
||||
// uncompressed := &bytes.Buffer{}
|
||||
// crc32W := crc32.NewIEEE()
|
||||
// if _, err := d.Copy(io.MultiWriter(uncompressed, crc32W), deflateR); err != nil {
|
||||
// d.Fatalf(err.Error())
|
||||
// }
|
||||
// // calculatedCRC32 := crc32W.Sum(nil)
|
||||
blockCRC32W := crc32.NewIEEE()
|
||||
if _, err := d.Copy(blockCRC32W, bitFlipReader{uncompressedBB.Copy()}); err != nil {
|
||||
d.IOPanic(err)
|
||||
}
|
||||
blockCRC32N := bits.Reverse32(binary.BigEndian.Uint32(blockCRC32W.Sum(nil)))
|
||||
_ = blockCRCValue.TryScalarFn(d.ValidateU(uint64(blockCRC32N)))
|
||||
streamCRCN = blockCRC32N ^ ((streamCRCN << 1) | (streamCRCN >> 31))
|
||||
|
||||
// HACK: bzip2.NewReader will read from start of whole buffer and then we figure out compressedSize ourself
|
||||
// "It is important to note that none of the fields within a StreamBlock or StreamFooter are necessarily byte-aligned"
|
||||
@ -118,12 +139,11 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
|
||||
d.FieldStruct("footer", func(d *decode.D) {
|
||||
d.FieldU48("magic", d.AssertU(footerMagic), d.Hex)
|
||||
// TODO: crc of block crcs
|
||||
d.FieldU32("crc", d.Hex)
|
||||
d.FieldU32("crc", d.Hex, d.ValidateU(uint64(streamCRCN)))
|
||||
d.FieldRawLen("padding", int64(d.ByteAlignBits()))
|
||||
})
|
||||
|
||||
// moreStreams = false
|
||||
|
||||
// }
|
||||
// })
|
||||
|
||||
|
4
format/bzip2/testdata/test.fqtest
vendored
4
format/bzip2/testdata/test.fqtest
vendored
@ -6,7 +6,7 @@ $ fq -d bzip2 verbose /test.bz2
|
||||
0x00| 39 | 9 | hundred_k_blocksize: 57 0x3-0x3.7 (1)
|
||||
| | | block: {} 0x4-0x1c.7 (25)
|
||||
0x00| 31 41 59 26 53 59 | 1AY&SY | magic: 0x314159265359 (valid) 0x4-0x9.7 (6)
|
||||
0x00| cc c3 71 d4 | ..q. | crc: 0xccc371d4 0xa-0xd.7 (4)
|
||||
0x00| cc c3 71 d4 | ..q. | crc: 0xccc371d4 (valid) 0xa-0xd.7 (4)
|
||||
0x00| 00 | . | randomised: 0 0xe-0xe (0.1)
|
||||
0x00| 00 00| ..| origptr: 4 0xe.1-0x11 (3)
|
||||
0x10|02 41 |.A |
|
||||
@ -24,5 +24,5 @@ $ fq -d bzip2 verbose /test.bz2
|
||||
0x20|19 97 8b |... |
|
||||
| | | footer: {} 0x22.1-0x2c.7 (10.7)
|
||||
0x20| 8b b9 22 9c 28 48 66 | ..".(Hf | magic: 0x177245385090 (valid) 0x22.1-0x28 (6)
|
||||
0x20| 66 61 b8 ea 00| | fa...| | crc: 0xccc371d4 0x28.1-0x2c (4)
|
||||
0x20| 66 61 b8 ea 00| | fa...| | crc: 0xccc371d4 (valid) 0x28.1-0x2c (4)
|
||||
0x20| 00| | .| | padding: raw bits 0x2c.1-0x2c.7 (0.7)
|
||||
|
@ -6,13 +6,13 @@ package gz
|
||||
|
||||
import (
|
||||
"compress/flate"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
|
||||
"github.com/wader/fq/format"
|
||||
"github.com/wader/fq/format/registry"
|
||||
"github.com/wader/fq/pkg/bitio"
|
||||
"github.com/wader/fq/pkg/decode"
|
||||
)
|
||||
|
||||
@ -115,7 +115,7 @@ func gzDecode(d *decode.D, in interface{}) interface{} {
|
||||
if _, err := io.Copy(crc32W, uncompressedBB.Copy()); err != nil {
|
||||
d.IOPanic(err)
|
||||
}
|
||||
d.FieldRawLen("crc32", 32, d.ValidateBitBuf(bitio.ReverseBytes(crc32W.Sum(nil))), d.RawHex)
|
||||
d.FieldU32("crc32", d.ValidateU(uint64(binary.LittleEndian.Uint32(crc32W.Sum(nil)))), d.Hex)
|
||||
d.FieldU32LE("isize")
|
||||
|
||||
return nil
|
||||
|
2
format/gzip/testdata/test.fqtest
vendored
2
format/gzip/testdata/test.fqtest
vendored
@ -16,5 +16,5 @@ $ fq -d gzip verbose /test.gz
|
||||
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
|
||||
0x00| 2b 49 2d 2e e1 02| +I-...| compressed: raw bits 0xa-0x10.7 (7)
|
||||
0x10|00 |. |
|
||||
0x10| c6 35 b9 3b | .5.; | crc32: "c635b93b" (raw bits) (valid) 0x11-0x14.7 (4)
|
||||
0x10| c6 35 b9 3b | .5.; | crc32: 0xc635b93b (valid) 0x11-0x14.7 (4)
|
||||
0x10| 05 00 00 00| | ....| | isize: 5 0x15-0x18.7 (4)
|
||||
|
4
format/json/testdata/json.fqtest
vendored
4
format/json/testdata/json.fqtest
vendored
@ -31,13 +31,13 @@ $ fq . /json.gz
|
||||
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} | uncompressed: {} (json)
|
||||
0x00| ab 56 4a 54 b2 52| .VJT.R| compressed: raw bits
|
||||
0x10|30 34 32 ae e5 02 00 |042.... |
|
||||
0x10| 20 ac d2 9c | ... | crc32: "20acd29c" (raw bits) (valid)
|
||||
0x10| 20 ac d2 9c | ... | crc32: 0x20acd29c (valid)
|
||||
0x10| 0b 00 00 00| | ....|| isize: 11
|
||||
$ fq tovalue /json.gz
|
||||
{
|
||||
"compressed": "<13>q1ZKVLJSMDQyruUCAA==",
|
||||
"compression_method": "deflate",
|
||||
"crc32": "20acd29c",
|
||||
"crc32": 548197020,
|
||||
"extra_flags": 0,
|
||||
"flags": {
|
||||
"comment": false,
|
||||
|
Loading…
Reference in New Issue
Block a user