1
1
mirror of https://github.com/wader/fq.git synced 2024-11-28 03:02:55 +03:00
fq/format/tar/tar.go

104 lines
3.1 KiB
Go
Raw Normal View History

2020-06-08 03:29:51 +03:00
package tar
// https://www.gnu.org/software/tar/manual/html_node/Standard.html
// TODO: extensions?
import (
"bytes"
"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/decode"
"github.com/wader/fq/pkg/scalar"
2020-06-08 03:29:51 +03:00
)
var probeFormat decode.Group
2020-06-08 03:29:51 +03:00
func init() {
registry.MustRegister(decode.Format{
2020-06-08 03:29:51 +03:00
Name: format.TAR,
Description: "Tar archive",
Groups: []string{format.PROBE},
DecodeFn: tarDecode,
Dependencies: []decode.Dependency{
{Names: []string{format.PROBE}, Group: &probeFormat},
2020-06-08 03:29:51 +03:00
},
})
}
func tarDecode(d *decode.D, in interface{}) interface{} {
2021-12-04 19:54:50 +03:00
const blockBytes = 512
const blockBits = blockBytes * 8
mapTrimSpaceNull := scalar.Trim(" \x00")
blockPadding := func(d *decode.D) int64 {
return (blockBits - (d.Pos() % blockBits)) % blockBits
2020-06-08 03:29:51 +03:00
}
2021-12-05 18:31:04 +03:00
// end marker is 512*2 zero bytes
2021-12-04 19:54:50 +03:00
endMarker := [blockBytes * 2]byte{}
var endMarkerStart int64
var endMarkerEnd int64
filesCount := 0
2020-06-08 03:29:51 +03:00
d.FieldArray("files", func(d *decode.D) {
2020-06-08 03:29:51 +03:00
for !d.End() {
d.FieldStruct("file", func(d *decode.D) {
2021-12-04 19:54:50 +03:00
d.FieldUTF8("name", 100, mapTrimSpaceNull)
d.FieldUTF8NullFixedLen("mode", 8, scalar.StrUintToSym(8))
d.FieldUTF8NullFixedLen("uid", 8, scalar.StrUintToSym(8))
d.FieldUTF8NullFixedLen("gid", 8, scalar.StrUintToSym(8))
sizeS := d.FieldScalarUTF8NullFixedLen("size", 12, scalar.StrUintToSym(8))
if sizeS.Sym == nil {
d.Fatalf("could not decode size")
}
size := int64(sizeS.SymU()) * 8
d.FieldUTF8NullFixedLen("mtime", 12, scalar.StrUintToSym(8))
d.FieldUTF8NullFixedLen("chksum", 8, scalar.StrUintToSym(8))
2021-12-04 19:54:50 +03:00
d.FieldUTF8("typeflag", 1, mapTrimSpaceNull)
d.FieldUTF8("linkname", 100, mapTrimSpaceNull)
d.FieldUTF8("magic", 6, mapTrimSpaceNull, d.AssertStr("ustar"))
d.FieldUTF8NullFixedLen("version", 2, scalar.StrUintToSym(8))
2021-12-04 19:54:50 +03:00
d.FieldUTF8("uname", 32, mapTrimSpaceNull)
d.FieldUTF8("gname", 32, mapTrimSpaceNull)
d.FieldUTF8NullFixedLen("devmajor", 8, scalar.StrUintToSym(8))
d.FieldUTF8NullFixedLen("devminor", 8, scalar.StrUintToSym(8))
2021-12-04 19:54:50 +03:00
d.FieldUTF8("prefix", 155, mapTrimSpaceNull)
d.FieldRawLen("header_block_padding", blockPadding(d), d.BitBufIsZero())
dv, _, _ := d.TryFieldFormatLen("data", size, probeFormat, nil)
2021-12-04 19:54:50 +03:00
if dv == nil {
d.FieldRawLen("data", size)
2020-06-08 03:29:51 +03:00
}
2021-12-04 19:54:50 +03:00
d.FieldRawLen("data_block_padding", blockPadding(d), d.BitBufIsZero())
2020-06-08 03:29:51 +03:00
})
filesCount++
2020-06-08 03:29:51 +03:00
if d.BitsLeft() >= int64(len(endMarker))*8 && bytes.Equal(d.PeekBytes(len(endMarker)), endMarker[:]) {
endMarkerStart = d.Pos()
// consensus seems to be to allow more than 2 zero blocks at end
d.SeekRel(int64(len(endMarker)) * 8)
zeroBlock := [blockBytes]byte{}
for d.BitsLeft() >= blockBytes*8 && bytes.Equal(d.PeekBytes(blockBytes), zeroBlock[:]) {
d.SeekRel(int64(len(zeroBlock)) * 8)
}
endMarkerEnd = d.Pos()
2020-06-08 03:29:51 +03:00
break
}
}
})
endMarkerSize := endMarkerEnd - endMarkerStart
if endMarkerSize > 0 {
d.RangeFn(endMarkerStart, endMarkerSize, func(d *decode.D) {
d.FieldRawLen("end_marker", d.BitsLeft())
})
}
2020-06-08 03:29:51 +03:00
if filesCount == 0 {
d.Errorf("no files found")
2020-06-08 03:29:51 +03:00
}
return nil
}