1
1
mirror of https://github.com/wader/fq.git synced 2024-09-11 12:05:39 +03:00

leveldb: fix metaindex keys, refactoring, and jq syntax per PR

This commit is contained in:
Michael B. 2023-12-07 17:11:55 +01:00
parent 3a396e156e
commit 287ed366f6
6 changed files with 104 additions and 74 deletions

View File

@ -118,6 +118,8 @@ func readLengthPrefixedString(name string, d *decode.D) {
func readTagInternalKey(name string, d *decode.D) {
d.FieldStruct(name, func(d *decode.D) {
length := d.FieldULEB128("length")
readInternalKey("data", int64(length), d)
d.FieldStruct("data", func(d *decode.D) {
readInternalKey(int(length), d)
})
})
}

View File

@ -3,40 +3,35 @@
# and keep the original order?
def _leveldb_descriptor_torepr:
def _f:
( if .type == "root" then
(
[
.value.blocks[].records[]
| {type: "record", value: .}
| _f
if .type == "root" then
[ .value.blocks[].records[]
| {type: "record", value: .}
| _f
]
elif .type == "record" then
if .value.header.record_type != "full" then
empty
else
[ .value.data.tags[]
| {type: "tag", value: {(.key): .value}}
| _f
]
)
elif .type == "record" then
(
if .value.header.record_type != "full" then
empty
else
[
.value.data.tags[]
| {type: "tag", value: {(.key): .value}}
| _f
]
end
elif .type == "tag" then
( .value
| if .comparator then .comparator |= .data else . end
| if .new_file then
( .new_file.largest_internal_key |= .data
| .new_file.smallest_internal_key |= .data
)
else .
end
| if .compact_pointer then
.compact_pointer.internal_key |= .data
else .
end
)
elif .type == "tag" then
(
.value
| if .comparator then (.comparator |= .data) else . end
| if .new_file then (
.new_file.largest_internal_key |= .data
| .new_file.smallest_internal_key |= .data
) else . end
| if .compact_pointer then (
.compact_pointer.internal_key |= .data
) else . end
)
end
);
end;
( {type: "root", value: .}
| _f
);
);

View File

@ -144,6 +144,7 @@ func readLogRecord(rro recordReadOptions, d *decode.D) {
rro.readDataFn(dataSize, recordType, d)
}
// simplified `functools.partial` (Python) or `Function.prototype.bind` (JavaScript)
func bind(f func(recordReadOptions, *decode.D), rro recordReadOptions) func(*decode.D) {
return func(d *decode.D) {
f(rro, d)

View File

@ -10,7 +10,6 @@ package leveldb
import (
"bytes"
"embed"
"fmt"
"hash/crc32"
"github.com/golang/snappy"
@ -22,7 +21,7 @@ import (
)
//go:embed leveldb_table.md
var leveldbFS embed.FS
var leveldbTableFS embed.FS
func init() {
interp.RegisterFormat(
@ -32,7 +31,7 @@ func init() {
Groups: []*decode.Group{format.Probe},
DecodeFn: ldbTableDecode,
})
interp.RegisterFS(leveldbFS)
interp.RegisterFS(leveldbTableFS)
}
const (
@ -44,8 +43,8 @@ const (
// echo http://code.google.com/p/leveldb/ | sha1sum
// https://github.com/google/leveldb/blob/main/table/format.h#L76
tableMagicNumber = 0xdb4775248b80fb57
uint32Size = int64(32)
uint64Size = int64(64)
uint32Size = 32
uint64Size = 64
)
// https://github.com/google/leveldb/blob/main/include/leveldb/options.h#L25
@ -105,7 +104,7 @@ func ldbTableDecode(d *decode.D) any {
d.SeekAbs(metaIndexOffset * 8)
var metaHandles []blockHandle
readTableBlock("metaindex", metaIndexSize, readKeyValueContents, func(d *decode.D) {
readTableBlock("metaindex", metaIndexSize, keyValueContentsReader(nil, func(size int, d *decode.D) {
// blockHandle
// https://github.com/google/leveldb/blob/main/table/format.cc#L24
handle := blockHandle{
@ -113,13 +112,13 @@ func ldbTableDecode(d *decode.D) any {
size: d.FieldULEB128("size"),
}
metaHandles = append(metaHandles, handle)
}, d)
}), d)
// index
d.SeekAbs(indexOffset * 8)
var dataHandles []blockHandle
readTableBlock("index", indexSize, readKeyValueContents, func(d *decode.D) {
readTableBlock("index", indexSize, keyValueContentsReader(readInternalKey, func(size int, d *decode.D) {
// blockHandle
// https://github.com/google/leveldb/blob/main/table/format.cc#L24
handle := blockHandle{
@ -127,7 +126,7 @@ func ldbTableDecode(d *decode.D) any {
size: d.FieldULEB128("size"),
}
dataHandles = append(dataHandles, handle)
}, d)
}), d)
// meta
@ -135,7 +134,7 @@ func ldbTableDecode(d *decode.D) any {
d.FieldArray("meta", func(d *decode.D) {
for _, handle := range metaHandles {
d.SeekAbs(int64(handle.offset) * 8)
readTableBlock("meta_block", int64(handle.size), readMetaContent, nil, d)
readTableBlock("meta_block", int64(handle.size), readMetaContent, d)
}
})
}
@ -146,7 +145,12 @@ func ldbTableDecode(d *decode.D) any {
d.FieldArray("data", func(d *decode.D) {
for _, handle := range dataHandles {
d.SeekAbs(int64(handle.offset) * 8)
readTableBlock("data_block", int64(handle.size), readKeyValueContents, nil, d)
readTableBlock(
"data_block",
int64(handle.size),
keyValueContentsReader(readInternalKey, nil),
d,
)
}
})
}
@ -159,7 +163,12 @@ func ldbTableDecode(d *decode.D) any {
// Read block contents as well as compression + checksum bytes following it.
// The function `readTableBlockContents` gets the uncompressed bytebuffer.
// https://github.com/google/leveldb/blob/main/table/format.cc#L69
func readTableBlock(name string, size int64, readTableBlockContents func(size int64, valueCallbackFn func(d *decode.D), d *decode.D), valueCallbackFn func(d *decode.D), d *decode.D) {
func readTableBlock(
name string,
size int64,
readTableBlockContents func(size int64, d *decode.D),
d *decode.D,
) {
d.FieldStruct(name, func(d *decode.D) {
start := d.Pos()
br := d.RawLen(size * 8)
@ -174,13 +183,12 @@ func readTableBlock(name string, size int64, readTableBlockContents func(size in
d.SeekAbs(start)
if compressionType == compressionTypeNone {
d.FieldStruct("uncompressed", func(d *decode.D) {
readTableBlockContents(size, valueCallbackFn, d)
readTableBlockContents(size, d)
})
} else {
compressedSize := size
compressed := data
bb := &bytes.Buffer{}
fmt.Println(bb, bb.Len())
switch compressionType {
case compressionTypeSnappy:
decompressed, err := snappy.Decode(nil, compressed)
@ -192,9 +200,13 @@ func readTableBlock(name string, size int64, readTableBlockContents func(size in
d.Errorf("Unsupported compression type: %x", compressionType)
}
if bb.Len() > 0 {
d.FieldStructRootBitBufFn("uncompressed", bitio.NewBitReader(bb.Bytes(), -1), func(d *decode.D) {
readTableBlockContents(int64(bb.Len()), valueCallbackFn, d)
})
d.FieldStructRootBitBufFn(
"uncompressed",
bitio.NewBitReader(bb.Bytes(), -1),
func(d *decode.D) {
readTableBlockContents(int64(bb.Len()), d)
},
)
}
d.FieldRawLen("compressed", compressedSize*8)
}
@ -205,7 +217,12 @@ func readTableBlock(name string, size int64, readTableBlockContents func(size in
// Read content encoded as a sequence of key/value-entries and a trailer of restarts.
// https://github.com/google/leveldb/blob/main/table/block_builder.cc#L16
// https://github.com/google/leveldb/blob/main/table/block.cc
func readKeyValueContents(size int64, valueCallbackFn func(d *decode.D), d *decode.D) {
func readKeyValueContents(
keyCallbackFn func(size int, d *decode.D),
valueCallbackFn func(size int, d *decode.D),
size int64,
d *decode.D,
) {
start := d.Pos()
end := start + size*8
@ -231,39 +248,54 @@ func readKeyValueContents(size int64, valueCallbackFn func(d *decode.D), d *deco
for d.Pos() < start+restartOffset {
d.FieldStruct("entry", func(d *decode.D) {
d.FieldULEB128("shared_bytes")
unshared := int64(d.FieldULEB128("unshared_bytes"))
valueLength := d.FieldULEB128("value_length")
readInternalKey("internal_key", unshared, d)
if valueCallbackFn == nil {
d.FieldUTF8("value", int(valueLength))
unshared := int(d.FieldULEB128("unshared_bytes"))
valueLength := int(d.FieldULEB128("value_length"))
if keyCallbackFn == nil {
d.FieldUTF8("key", unshared)
} else {
d.FieldStruct("value", valueCallbackFn)
d.FieldStruct("key", func(d *decode.D) {
keyCallbackFn(unshared, d)
})
}
if valueCallbackFn == nil {
d.FieldUTF8("value", valueLength)
} else {
d.FieldStruct("value", func(d *decode.D) {
valueCallbackFn(valueLength, d)
})
}
})
}
})
}
func readInternalKey(name string, bitSize int64, d *decode.D) {
func readInternalKey(bitSize int, d *decode.D) {
// InternalKey
// https://github.com/google/leveldb/blob/main/db/dbformat.h#L171
d.FieldStruct(name, func(d *decode.D) {
d.FieldUTF8("user_key", int(bitSize-uint64Size/8))
d.FieldU8("type", valueTypes, scalar.UintHex)
d.FieldU56("sequence_number")
})
d.FieldUTF8("user_key", bitSize-uint64Size/8)
d.FieldU8("type", valueTypes, scalar.UintHex)
d.FieldU56("sequence_number")
}
// Read content encoded in the "filter" or "stats" Meta Block format.
// https://github.com/google/leveldb/blob/main/doc/table_format.md#filter-meta-block
// https://github.com/google/leveldb/blob/main/table/filter_block.cc
func readMetaContent(size int64, valueCallbackFn func(d *decode.D), d *decode.D) {
func readMetaContent(size int64, d *decode.D) {
// TK(2023-12-04)
d.FieldRawLen("raw", size*8)
}
// Helpers
func keyValueContentsReader(
keyCallbackFn func(size int, d *decode.D),
valueCallbackFn func(size int, d *decode.D),
) func(size int64, d *decode.D) {
return func(size int64, d *decode.D) {
readKeyValueContents(keyCallbackFn, valueCallbackFn, size, d)
}
}
// Compute the checksum: a CRC32 as in RFC3720 + custom mask.
// https://datatracker.ietf.org/doc/html/rfc3720#appendix-B.4
func computeChecksum(bytes []uint8) uint32 {

View File

@ -8,7 +8,7 @@ $ fq -d leveldb_table dv snappy.ldb/000005.ldb
0x000|00 |. | shared_bytes: 0 0x0-0x1 (1)
0x000| 13 | . | unshared_bytes: 19 0x1-0x2 (1)
0x000| bd 03 | .. | value_length: 445 0x2-0x4 (2)
| | | internal_key{}: 0x4-0x17 (19)
| | | key{}: 0x4-0x17 (19)
0x000| 6c 6f 72 65 6d 2e 64 6f 6c 6f 72 | lorem.dolor | user_key: "lorem.dolor" 0x4-0xf (11)
0x000| 01| .| type: "value" (0x1) 0xf-0x10 (1)
0x001|03 00 00 00 00 00 00 |....... | sequence_number: 3 0x10-0x17 (7)
@ -19,7 +19,7 @@ $ fq -d leveldb_table dv snappy.ldb/000005.ldb
0x01d| 06 | . | shared_bytes: 6 0x1d4-0x1d5 (1)
0x01d| 0d | . | unshared_bytes: 13 0x1d5-0x1d6 (1)
0x01d| bd 03 | .. | value_length: 445 0x1d6-0x1d8 (2)
| | | internal_key{}: 0x1d8-0x1e5 (13)
| | | key{}: 0x1d8-0x1e5 (13)
0x01d| 69 70 73 75 6d | ipsum | user_key: "ipsum" 0x1d8-0x1dd (5)
0x01d| 01 | . | type: "value" (0x1) 0x1dd-0x1de (1)
0x01d| 02 00| ..| sequence_number: 2 0x1de-0x1e5 (7)
@ -31,7 +31,7 @@ $ fq -d leveldb_table dv snappy.ldb/000005.ldb
0x03a| 06 | . | shared_bytes: 6 0x3a2-0x3a3 (1)
0x03a| 0d | . | unshared_bytes: 13 0x3a3-0x3a4 (1)
0x03a| bd 03 | .. | value_length: 445 0x3a4-0x3a6 (2)
| | | internal_key{}: 0x3a6-0x3b3 (13)
| | | key{}: 0x3a6-0x3b3 (13)
0x03a| 6c 6f 72 65 6d | lorem | user_key: "lorem" 0x3a6-0x3ab (5)
0x03a| 01 | . | type: "value" (0x1) 0x3ab-0x3ac (1)
0x03a| 01 00 00 00| ....| sequence_number: 1 0x3ac-0x3b3 (7)
@ -43,7 +43,7 @@ $ fq -d leveldb_table dv snappy.ldb/000005.ldb
0x057|00 |. | shared_bytes: 0 0x570-0x571 (1)
0x057| 0b | . | unshared_bytes: 11 0x571-0x572 (1)
0x057| 76 | v | value_length: 118 0x572-0x573 (1)
| | | internal_key{}: 0x573-0x57e (11)
| | | key{}: 0x573-0x57e (11)
0x057| 72 6f 77 | row | user_key: "row" 0x573-0x576 (3)
0x057| 01 | . | type: "value" (0x1) 0x576-0x577 (1)
0x057| 04 00 00 00 00 00 00 | ....... | sequence_number: 4 0x577-0x57e (7)
@ -74,7 +74,7 @@ $ fq -d leveldb_table dv snappy.ldb/000005.ldb
0x00270| 00 | . | shared_bytes: 0 0x278-0x279 (1)
0x00270| 09 | . | unshared_bytes: 9 0x279-0x27a (1)
0x00270| 03 | . | value_length: 3 0x27a-0x27b (1)
| | | internal_key{}: 0x27b-0x284 (9)
| | | key{}: 0x27b-0x284 (9)
0x00270| 73 | s | user_key: "s" 0x27b-0x27c (1)
0x00270| 01 | . | type: "value" (0x1) 0x27c-0x27d (1)
0x00270| ff ff ff| ...| sequence_number: 72057594037927935 0x27d-0x284 (7)

View File

@ -8,7 +8,7 @@ $ fq -d leveldb_table dv uncompressed.ldb/000005.ldb
0x000|00 |. | shared_bytes: 0 0x0-0x1 (1)
0x000| 13 | . | unshared_bytes: 19 0x1-0x2 (1)
0x000| bd 03 | .. | value_length: 445 0x2-0x4 (2)
| | | internal_key{}: 0x4-0x17 (19)
| | | key{}: 0x4-0x17 (19)
0x000| 6c 6f 72 65 6d 2e 64 6f 6c 6f 72 | lorem.dolor | user_key: "lorem.dolor" 0x4-0xf (11)
0x000| 01| .| type: "value" (0x1) 0xf-0x10 (1)
0x010|03 00 00 00 00 00 00 |....... | sequence_number: 3 0x10-0x17 (7)
@ -19,7 +19,7 @@ $ fq -d leveldb_table dv uncompressed.ldb/000005.ldb
0x1d0| 06 | . | shared_bytes: 6 0x1d4-0x1d5 (1)
0x1d0| 0d | . | unshared_bytes: 13 0x1d5-0x1d6 (1)
0x1d0| bd 03 | .. | value_length: 445 0x1d6-0x1d8 (2)
| | | internal_key{}: 0x1d8-0x1e5 (13)
| | | key{}: 0x1d8-0x1e5 (13)
0x1d0| 69 70 73 75 6d | ipsum | user_key: "ipsum" 0x1d8-0x1dd (5)
0x1d0| 01 | . | type: "value" (0x1) 0x1dd-0x1de (1)
0x1d0| 02 00| ..| sequence_number: 2 0x1de-0x1e5 (7)
@ -31,7 +31,7 @@ $ fq -d leveldb_table dv uncompressed.ldb/000005.ldb
0x3a0| 06 | . | shared_bytes: 6 0x3a2-0x3a3 (1)
0x3a0| 0d | . | unshared_bytes: 13 0x3a3-0x3a4 (1)
0x3a0| bd 03 | .. | value_length: 445 0x3a4-0x3a6 (2)
| | | internal_key{}: 0x3a6-0x3b3 (13)
| | | key{}: 0x3a6-0x3b3 (13)
0x3a0| 6c 6f 72 65 6d | lorem | user_key: "lorem" 0x3a6-0x3ab (5)
0x3a0| 01 | . | type: "value" (0x1) 0x3ab-0x3ac (1)
0x3a0| 01 00 00 00| ....| sequence_number: 1 0x3ac-0x3b3 (7)
@ -43,7 +43,7 @@ $ fq -d leveldb_table dv uncompressed.ldb/000005.ldb
0x570|00 |. | shared_bytes: 0 0x570-0x571 (1)
0x570| 0b | . | unshared_bytes: 11 0x571-0x572 (1)
0x570| 76 | v | value_length: 118 0x572-0x573 (1)
| | | internal_key{}: 0x573-0x57e (11)
| | | key{}: 0x573-0x57e (11)
0x570| 72 6f 77 | row | user_key: "row" 0x573-0x576 (3)
0x570| 01 | . | type: "value" (0x1) 0x576-0x577 (1)
0x570| 04 00 00 00 00 00 00 | ....... | sequence_number: 4 0x577-0x57e (7)
@ -72,7 +72,7 @@ $ fq -d leveldb_table dv uncompressed.ldb/000005.ldb
0x600| 00 | . | shared_bytes: 0 0x60e-0x60f (1)
0x600| 09| .| unshared_bytes: 9 0x60f-0x610 (1)
0x610|03 |. | value_length: 3 0x610-0x611 (1)
| | | internal_key{}: 0x611-0x61a (9)
| | | key{}: 0x611-0x61a (9)
0x610| 73 | s | user_key: "s" 0x611-0x612 (1)
0x610| 01 | . | type: "value" (0x1) 0x612-0x613 (1)
0x610| ff ff ff ff ff ff ff | ....... | sequence_number: 72057594037927935 0x613-0x61a (7)