2020-06-08 03:29:51 +03:00
|
|
|
package tar
|
|
|
|
|
|
|
|
// https://www.gnu.org/software/tar/manual/html_node/Standard.html
|
|
|
|
// TODO: extensions?
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2022-10-04 17:59:50 +03:00
|
|
|
"time"
|
2021-08-17 13:06:32 +03:00
|
|
|
|
|
|
|
"github.com/wader/fq/format"
|
|
|
|
"github.com/wader/fq/pkg/decode"
|
2022-07-16 19:39:57 +03:00
|
|
|
"github.com/wader/fq/pkg/interp"
|
2021-12-02 00:48:25 +03:00
|
|
|
"github.com/wader/fq/pkg/scalar"
|
2020-06-08 03:29:51 +03:00
|
|
|
)
|
|
|
|
|
2021-11-17 18:46:10 +03:00
|
|
|
var probeFormat decode.Group
|
2020-06-08 03:29:51 +03:00
|
|
|
|
|
|
|
func init() {
|
2022-07-16 19:39:57 +03:00
|
|
|
interp.RegisterFormat(decode.Format{
|
2020-06-08 03:29:51 +03:00
|
|
|
Name: format.TAR,
|
|
|
|
Description: "Tar archive",
|
|
|
|
Groups: []string{format.PROBE},
|
|
|
|
DecodeFn: tarDecode,
|
|
|
|
Dependencies: []decode.Dependency{
|
2021-11-17 18:46:10 +03:00
|
|
|
{Names: []string{format.PROBE}, Group: &probeFormat},
|
2020-06-08 03:29:51 +03:00
|
|
|
},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-09-30 14:58:23 +03:00
|
|
|
var unixTimeEpochDate = time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)
|
|
|
|
|
2022-07-19 19:33:50 +03:00
|
|
|
func tarDecode(d *decode.D, _ any) any {
|
2021-12-04 19:54:50 +03:00
|
|
|
const blockBytes = 512
|
|
|
|
const blockBits = blockBytes * 8
|
|
|
|
|
2022-09-30 14:58:23 +03:00
|
|
|
mapTrimSpaceNull := scalar.StrActualTrim(" \x00")
|
2021-12-04 19:54:50 +03:00
|
|
|
blockPadding := func(d *decode.D) int64 {
|
|
|
|
return (blockBits - (d.Pos() % blockBits)) % blockBits
|
2020-06-08 03:29:51 +03:00
|
|
|
}
|
|
|
|
|
2021-12-05 18:31:04 +03:00
|
|
|
// end marker is 512*2 zero bytes
|
2021-12-04 19:54:50 +03:00
|
|
|
endMarker := [blockBytes * 2]byte{}
|
2022-01-18 14:54:50 +03:00
|
|
|
var endMarkerStart int64
|
|
|
|
var endMarkerEnd int64
|
2022-01-18 14:12:32 +03:00
|
|
|
filesCount := 0
|
2020-06-08 03:29:51 +03:00
|
|
|
|
2021-11-05 17:04:26 +03:00
|
|
|
d.FieldArray("files", func(d *decode.D) {
|
2020-06-08 03:29:51 +03:00
|
|
|
for !d.End() {
|
2021-11-05 17:04:26 +03:00
|
|
|
d.FieldStruct("file", func(d *decode.D) {
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("name", 100, mapTrimSpaceNull)
|
2022-09-30 14:58:23 +03:00
|
|
|
d.FieldUTF8NullFixedLen("mode", 8, scalar.TryStrSymParseUint(8))
|
|
|
|
d.FieldUTF8NullFixedLen("uid", 8, scalar.TryStrSymParseUint(8))
|
|
|
|
d.FieldUTF8NullFixedLen("gid", 8, scalar.TryStrSymParseUint(8))
|
|
|
|
size, sizeOk := d.FieldScalarUTF8NullFixedLen("size", 12, scalar.TryStrSymParseUint(8)).TrySymUint()
|
|
|
|
if !sizeOk {
|
2021-12-06 16:15:25 +03:00
|
|
|
d.Fatalf("could not decode size")
|
|
|
|
}
|
2022-09-30 14:58:23 +03:00
|
|
|
size *= 8
|
|
|
|
d.FieldUTF8NullFixedLen("mtime", 12, scalar.TryStrSymParseUint(8), scalar.StrFn(func(s scalar.Str) (scalar.Str, error) {
|
|
|
|
// TODO: string might not be a number, move to scalar?
|
|
|
|
if v, ok := s.TrySymUint(); ok {
|
|
|
|
s.Description = unixTimeEpochDate.Add(time.Duration(v) * time.Second).Format(time.RFC3339)
|
|
|
|
}
|
|
|
|
return s, nil
|
|
|
|
}))
|
|
|
|
d.FieldUTF8NullFixedLen("chksum", 8, scalar.TryStrSymParseUint(8))
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("typeflag", 1, mapTrimSpaceNull)
|
|
|
|
d.FieldUTF8("linkname", 100, mapTrimSpaceNull)
|
2022-09-30 14:58:23 +03:00
|
|
|
d.FieldUTF8("magic", 6, mapTrimSpaceNull, d.StrAssert("ustar"))
|
|
|
|
d.FieldUTF8NullFixedLen("version", 2, scalar.TryStrSymParseUint(8))
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("uname", 32, mapTrimSpaceNull)
|
|
|
|
d.FieldUTF8("gname", 32, mapTrimSpaceNull)
|
2022-09-30 14:58:23 +03:00
|
|
|
d.FieldUTF8NullFixedLen("devmajor", 8, scalar.TryStrSymParseUint(8))
|
|
|
|
d.FieldUTF8NullFixedLen("devminor", 8, scalar.TryStrSymParseUint(8))
|
2021-12-04 19:54:50 +03:00
|
|
|
d.FieldUTF8("prefix", 155, mapTrimSpaceNull)
|
|
|
|
d.FieldRawLen("header_block_padding", blockPadding(d), d.BitBufIsZero())
|
|
|
|
|
2022-09-30 14:58:23 +03:00
|
|
|
d.FieldFormatOrRawLen("data", int64(size), probeFormat, nil)
|
2021-12-04 19:54:50 +03:00
|
|
|
|
|
|
|
d.FieldRawLen("data_block_padding", blockPadding(d), d.BitBufIsZero())
|
2020-06-08 03:29:51 +03:00
|
|
|
})
|
2022-01-18 14:12:32 +03:00
|
|
|
filesCount++
|
2020-06-08 03:29:51 +03:00
|
|
|
|
2022-01-18 14:54:50 +03:00
|
|
|
if d.BitsLeft() >= int64(len(endMarker))*8 && bytes.Equal(d.PeekBytes(len(endMarker)), endMarker[:]) {
|
|
|
|
endMarkerStart = d.Pos()
|
|
|
|
// consensus seems to be to allow more than 2 zero blocks at end
|
|
|
|
d.SeekRel(int64(len(endMarker)) * 8)
|
|
|
|
zeroBlock := [blockBytes]byte{}
|
|
|
|
for d.BitsLeft() >= blockBytes*8 && bytes.Equal(d.PeekBytes(blockBytes), zeroBlock[:]) {
|
|
|
|
d.SeekRel(int64(len(zeroBlock)) * 8)
|
|
|
|
}
|
|
|
|
endMarkerEnd = d.Pos()
|
2020-06-08 03:29:51 +03:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
2022-01-18 14:54:50 +03:00
|
|
|
endMarkerSize := endMarkerEnd - endMarkerStart
|
|
|
|
if endMarkerSize > 0 {
|
|
|
|
d.RangeFn(endMarkerStart, endMarkerSize, func(d *decode.D) {
|
|
|
|
d.FieldRawLen("end_marker", d.BitsLeft())
|
|
|
|
})
|
2022-01-18 14:12:32 +03:00
|
|
|
}
|
2020-06-08 03:29:51 +03:00
|
|
|
|
2022-01-18 14:12:32 +03:00
|
|
|
if filesCount == 0 {
|
2021-11-17 18:26:13 +03:00
|
|
|
d.Errorf("no files found")
|
2020-06-08 03:29:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|