2020-06-08 03:29:51 +03:00
|
|
|
package tar
|
|
|
|
|
|
|
|
// https://www.gnu.org/software/tar/manual/html_node/Standard.html
|
|
|
|
// TODO: extensions?
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2021-08-17 13:06:32 +03:00
|
|
|
|
|
|
|
"github.com/wader/fq/format"
|
|
|
|
"github.com/wader/fq/format/registry"
|
|
|
|
"github.com/wader/fq/pkg/decode"
|
2021-12-02 00:48:25 +03:00
|
|
|
"github.com/wader/fq/pkg/scalar"
|
2020-06-08 03:29:51 +03:00
|
|
|
)
|
|
|
|
|
2021-11-17 18:46:10 +03:00
|
|
|
var probeFormat decode.Group
|
2020-06-08 03:29:51 +03:00
|
|
|
|
|
|
|
func init() {
|
2021-11-17 18:46:10 +03:00
|
|
|
registry.MustRegister(decode.Format{
|
2020-06-08 03:29:51 +03:00
|
|
|
Name: format.TAR,
|
|
|
|
Description: "Tar archive",
|
|
|
|
Groups: []string{format.PROBE},
|
|
|
|
DecodeFn: tarDecode,
|
|
|
|
Dependencies: []decode.Dependency{
|
2021-11-17 18:46:10 +03:00
|
|
|
{Names: []string{format.PROBE}, Group: &probeFormat},
|
2020-06-08 03:29:51 +03:00
|
|
|
},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func tarDecode(d *decode.D, in interface{}) interface{} {
|
2021-12-04 19:54:50 +03:00
|
|
|
const blockBytes = 512
|
|
|
|
const blockBits = blockBytes * 8
|
|
|
|
|
|
|
|
mapTrimSpaceNull := scalar.Trim(" \x00")
|
|
|
|
blockPadding := func(d *decode.D) int64 {
|
|
|
|
return (blockBits - (d.Pos() % blockBits)) % blockBits
|
2020-06-08 03:29:51 +03:00
|
|
|
}
|
|
|
|
|
2021-12-05 18:31:04 +03:00
|
|
|
// end marker is 512*2 zero bytes
|
2021-12-04 19:54:50 +03:00
|
|
|
endMarker := [blockBytes * 2]byte{}
|
2022-01-18 14:54:50 +03:00
|
|
|
var endMarkerStart int64
|
|
|
|
var endMarkerEnd int64
|
2022-01-18 14:12:32 +03:00
|
|
|
filesCount := 0
|
2020-06-08 03:29:51 +03:00
|
|
|
|
2021-11-05 17:04:26 +03:00
|
|
|
d.FieldArray("files", func(d *decode.D) {
|
2020-06-08 03:29:51 +03:00
|
|
|
for !d.End() {
|
2021-11-05 17:04:26 +03:00
|
|
|
d.FieldStruct("file", func(d *decode.D) {
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("name", 100, mapTrimSpaceNull)
|
2022-01-28 19:18:30 +03:00
|
|
|
d.FieldUTF8NullFixedLen("mode", 8, scalar.StrUintToSym(8))
|
|
|
|
d.FieldUTF8NullFixedLen("uid", 8, scalar.StrUintToSym(8))
|
|
|
|
d.FieldUTF8NullFixedLen("gid", 8, scalar.StrUintToSym(8))
|
|
|
|
sizeS := d.FieldScalarUTF8NullFixedLen("size", 12, scalar.StrUintToSym(8))
|
2021-12-06 16:15:25 +03:00
|
|
|
if sizeS.Sym == nil {
|
|
|
|
d.Fatalf("could not decode size")
|
|
|
|
}
|
|
|
|
size := int64(sizeS.SymU()) * 8
|
2022-01-28 19:18:30 +03:00
|
|
|
d.FieldUTF8NullFixedLen("mtime", 12, scalar.StrUintToSym(8))
|
|
|
|
d.FieldUTF8NullFixedLen("chksum", 8, scalar.StrUintToSym(8))
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("typeflag", 1, mapTrimSpaceNull)
|
|
|
|
d.FieldUTF8("linkname", 100, mapTrimSpaceNull)
|
2022-01-18 14:12:32 +03:00
|
|
|
d.FieldUTF8("magic", 6, mapTrimSpaceNull, d.AssertStr("ustar"))
|
2022-01-28 19:18:30 +03:00
|
|
|
d.FieldUTF8NullFixedLen("version", 2, scalar.StrUintToSym(8))
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("uname", 32, mapTrimSpaceNull)
|
|
|
|
d.FieldUTF8("gname", 32, mapTrimSpaceNull)
|
2022-01-28 19:18:30 +03:00
|
|
|
d.FieldUTF8NullFixedLen("devmajor", 8, scalar.StrUintToSym(8))
|
|
|
|
d.FieldUTF8NullFixedLen("devminor", 8, scalar.StrUintToSym(8))
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("prefix", 155, mapTrimSpaceNull)
|
|
|
|
d.FieldRawLen("header_block_padding", blockPadding(d), d.BitBufIsZero())
|
|
|
|
|
2021-12-06 16:15:25 +03:00
|
|
|
dv, _, _ := d.TryFieldFormatLen("data", size, probeFormat, nil)
|
2021-12-04 19:54:50 +03:00
|
|
|
if dv == nil {
|
2021-12-06 16:15:25 +03:00
|
|
|
d.FieldRawLen("data", size)
|
2020-06-08 03:29:51 +03:00
|
|
|
}
|
2021-12-04 19:54:50 +03:00
|
|
|
|
|
|
|
d.FieldRawLen("data_block_padding", blockPadding(d), d.BitBufIsZero())
|
2020-06-08 03:29:51 +03:00
|
|
|
})
|
2022-01-18 14:12:32 +03:00
|
|
|
filesCount++
|
2020-06-08 03:29:51 +03:00
|
|
|
|
2022-01-18 14:54:50 +03:00
|
|
|
if d.BitsLeft() >= int64(len(endMarker))*8 && bytes.Equal(d.PeekBytes(len(endMarker)), endMarker[:]) {
|
|
|
|
endMarkerStart = d.Pos()
|
|
|
|
// consensus seems to be to allow more than 2 zero blocks at end
|
|
|
|
d.SeekRel(int64(len(endMarker)) * 8)
|
|
|
|
zeroBlock := [blockBytes]byte{}
|
|
|
|
for d.BitsLeft() >= blockBytes*8 && bytes.Equal(d.PeekBytes(blockBytes), zeroBlock[:]) {
|
|
|
|
d.SeekRel(int64(len(zeroBlock)) * 8)
|
|
|
|
}
|
|
|
|
endMarkerEnd = d.Pos()
|
2020-06-08 03:29:51 +03:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
2022-01-18 14:54:50 +03:00
|
|
|
endMarkerSize := endMarkerEnd - endMarkerStart
|
|
|
|
if endMarkerSize > 0 {
|
|
|
|
d.RangeFn(endMarkerStart, endMarkerSize, func(d *decode.D) {
|
|
|
|
d.FieldRawLen("end_marker", d.BitsLeft())
|
|
|
|
})
|
2022-01-18 14:12:32 +03:00
|
|
|
}
|
2020-06-08 03:29:51 +03:00
|
|
|
|
2022-01-18 14:12:32 +03:00
|
|
|
if filesCount == 0 {
|
2021-11-17 18:26:13 +03:00
|
|
|
d.Errorf("no files found")
|
2020-06-08 03:29:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|