1
1
mirror of https://github.com/wader/fq.git synced 2024-12-24 05:41:38 +03:00
fq/format/tar/tar.go

111 lines
3.4 KiB
Go
Raw Normal View History

2020-06-08 03:29:51 +03:00
package tar
// https://www.gnu.org/software/tar/manual/html_node/Standard.html
// TODO: extensions?
import (
"bytes"
"time"
"github.com/wader/fq/format"
"github.com/wader/fq/pkg/decode"
"github.com/wader/fq/pkg/interp"
"github.com/wader/fq/pkg/scalar"
2020-06-08 03:29:51 +03:00
)
var probeGroup decode.Group
2020-06-08 03:29:51 +03:00
func init() {
interp.RegisterFormat(
2023-05-01 14:19:04 +03:00
format.TAR,
&decode.Format{
Description: "Tar archive",
Groups: []*decode.Group{format.Probe},
DecodeFn: tarDecode,
Dependencies: []decode.Dependency{
{Groups: []*decode.Group{format.Probe}, Out: &probeGroup},
},
})
2020-06-08 03:29:51 +03:00
}
var unixTimeEpochDate = time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)
func tarDecode(d *decode.D) any {
2021-12-04 19:54:50 +03:00
const blockBytes = 512
const blockBits = blockBytes * 8
mapTrimSpaceNull := scalar.StrActualTrim(" \x00")
2021-12-04 19:54:50 +03:00
blockPadding := func(d *decode.D) int64 {
return (blockBits - (d.Pos() % blockBits)) % blockBits
2020-06-08 03:29:51 +03:00
}
2021-12-05 18:31:04 +03:00
// end marker is 512*2 zero bytes
2021-12-04 19:54:50 +03:00
endMarker := [blockBytes * 2]byte{}
var endMarkerStart int64
var endMarkerEnd int64
filesCount := 0
2020-06-08 03:29:51 +03:00
d.FieldArray("files", func(d *decode.D) {
2020-06-08 03:29:51 +03:00
for !d.End() {
d.FieldStruct("file", func(d *decode.D) {
2021-12-04 19:54:50 +03:00
d.FieldUTF8("name", 100, mapTrimSpaceNull)
d.FieldUTF8NullFixedLen("mode", 8, scalar.TryStrSymParseUint(8))
d.FieldUTF8NullFixedLen("uid", 8, scalar.TryStrSymParseUint(8))
d.FieldUTF8NullFixedLen("gid", 8, scalar.TryStrSymParseUint(8))
size, sizeOk := d.FieldScalarUTF8NullFixedLen("size", 12, scalar.TryStrSymParseUint(8)).TrySymUint()
if !sizeOk {
d.Fatalf("could not decode size")
}
size *= 8
d.FieldUTF8NullFixedLen("mtime", 12, scalar.TryStrSymParseUint(8), scalar.StrFn(func(s scalar.Str) (scalar.Str, error) {
// TODO: string might not be a number, move to scalar?
if v, ok := s.TrySymUint(); ok {
s.Description = unixTimeEpochDate.Add(time.Duration(v) * time.Second).Format(time.RFC3339)
}
return s, nil
}))
d.FieldUTF8NullFixedLen("chksum", 8, scalar.TryStrSymParseUint(8))
2021-12-04 19:54:50 +03:00
d.FieldUTF8("typeflag", 1, mapTrimSpaceNull)
d.FieldUTF8("linkname", 100, mapTrimSpaceNull)
d.FieldUTF8("magic", 6, mapTrimSpaceNull, d.StrAssert("ustar"))
d.FieldUTF8NullFixedLen("version", 2, scalar.TryStrSymParseUint(8))
2021-12-04 19:54:50 +03:00
d.FieldUTF8("uname", 32, mapTrimSpaceNull)
d.FieldUTF8("gname", 32, mapTrimSpaceNull)
d.FieldUTF8NullFixedLen("devmajor", 8, scalar.TryStrSymParseUint(8))
d.FieldUTF8NullFixedLen("devminor", 8, scalar.TryStrSymParseUint(8))
2021-12-04 19:54:50 +03:00
d.FieldUTF8("prefix", 155, mapTrimSpaceNull)
d.FieldRawLen("header_block_padding", blockPadding(d), d.BitBufIsZero())
d.FieldFormatOrRawLen("data", int64(size), &probeGroup, format.Probe_In{})
2021-12-04 19:54:50 +03:00
d.FieldRawLen("data_block_padding", blockPadding(d), d.BitBufIsZero())
2020-06-08 03:29:51 +03:00
})
filesCount++
2020-06-08 03:29:51 +03:00
if d.BitsLeft() >= int64(len(endMarker))*8 && bytes.Equal(d.PeekBytes(len(endMarker)), endMarker[:]) {
endMarkerStart = d.Pos()
// consensus seems to be to allow more than 2 zero blocks at end
d.SeekRel(int64(len(endMarker)) * 8)
zeroBlock := [blockBytes]byte{}
for d.BitsLeft() >= blockBytes*8 && bytes.Equal(d.PeekBytes(blockBytes), zeroBlock[:]) {
d.SeekRel(int64(len(zeroBlock)) * 8)
}
endMarkerEnd = d.Pos()
2020-06-08 03:29:51 +03:00
break
}
}
})
endMarkerSize := endMarkerEnd - endMarkerStart
if endMarkerSize > 0 {
d.RangeFn(endMarkerStart, endMarkerSize, func(d *decode.D) {
d.FieldRawLen("end_marker", d.BitsLeft())
})
}
2020-06-08 03:29:51 +03:00
if filesCount == 0 {
d.Errorf("no files found")
2020-06-08 03:29:51 +03:00
}
return nil
}