1
1
mirror of https://github.com/wader/fq.git synced 2024-09-11 12:05:39 +03:00

leveldb: add log and descriptor decoders

This commit is contained in:
Michael B. 2023-12-06 19:24:49 +01:00
parent 78a3e94bd7
commit 2df0f0fbcf
25 changed files with 571 additions and 92 deletions

View File

@ -14,6 +14,9 @@ $ fq -n _registry.groups.probe
"gif",
"gzip",
"jpeg",
"leveldb_descriptor",
"leveldb_log",
"leveldb_table",
"luajit",
"macho",
"macho_fat",
@ -111,6 +114,9 @@ ipv6_packet Internet protocol v6 packet
jpeg Joint Photographic Experts Group file
json JavaScript Object Notation
jsonl JavaScript Object Notation Lines
leveldb_descriptor LevelDB Descriptor
leveldb_log LevelDB Log
leveldb_table LevelDB Table
luajit LuaJIT 2.0 bytecode
macho Mach-O macOS executable
macho_fat Fat Mach-O macOS executable (multi-architecture)

View File

@ -125,7 +125,9 @@ var (
JPEG = &decode.Group{Name: "jpeg"}
JSON = &decode.Group{Name: "json"}
JSONL = &decode.Group{Name: "jsonl"}
LDB = &decode.Group{Name: "leveldb_ldb"}
LevelDB_Descriptor = &decode.Group{Name: "leveldb_descriptor"}
LDB = &decode.Group{Name: "leveldb_table"}
LOG = &decode.Group{Name: "leveldb_log"}
LuaJIT = &decode.Group{Name: "luajit"}
MachO = &decode.Group{Name: "macho"}
MachO_Fat = &decode.Group{Name: "macho_fat"}

View File

@ -0,0 +1,125 @@
package leveldb
// https://github.com/google/leveldb/blob/main/doc/impl.md#manifest
// https://github.com/google/leveldb/blob/main/db/version_edit.cc
//
// Files in LevelDB using this format include:
// - MANIFEST-*
import (
"embed"
"github.com/wader/fq/format"
"github.com/wader/fq/pkg/decode"
"github.com/wader/fq/pkg/interp"
"github.com/wader/fq/pkg/scalar"
)
//go:embed leveldb_log.md
var leveldbDescriptorFS embed.FS
func init() {
interp.RegisterFormat(
format.LevelDB_Descriptor,
&decode.Format{
Description: "LevelDB Descriptor",
Groups: []*decode.Group{format.Probe},
DecodeFn: ldbDescriptorDecode,
})
interp.RegisterFS(leveldbDescriptorFS)
}
const (
tagTypeComparator = 1
tagTypeLogNumber = 2
tagTypeNextFileNumber = 3
tagTypeLastSequence = 4
tagTypeCompactPointer = 5
tagTypeDeletedFile = 6
tagTypeNewFile = 7
// 8 not used anymore
tagTypePrevLogNumber = 9
)
var tagTypes = scalar.UintMapSymStr{
tagTypeComparator: "comparator",
tagTypeLogNumber: "log_number",
tagTypeNextFileNumber: "next file number",
tagTypeLastSequence: "last sequence",
tagTypeCompactPointer: "compact pointer",
tagTypeDeletedFile: "deleted file",
tagTypeNewFile: "new file",
tagTypePrevLogNumber: "previous log number",
}
func ldbDescriptorDecode(d *decode.D) any {
rro := recordReadOptions{readDataFn: func(size int64, recordType int, d *decode.D) {
if recordType == recordTypeFull {
d.FieldStruct("data", func(d *decode.D) {
d.LimitedFn(size, readManifest)
})
} else {
d.FieldRawLen("data", size)
}
}}
readBlockSequence(rro, d)
return nil
}
// List of sorted tables for each level involving key ranges and other metadata.
func readManifest(d *decode.D) {
d.FieldArray("tags", func(d *decode.D) {
for {
if d.End() {
break
}
d.FieldStruct("tag", func(d *decode.D) {
tag := d.FieldULEB128("key", tagTypes)
switch tag {
case tagTypeComparator:
readLengthPrefixedString("value", d)
case tagTypeLogNumber,
tagTypePrevLogNumber,
tagTypeNextFileNumber,
tagTypeLastSequence:
d.FieldULEB128("value")
case tagTypeCompactPointer:
d.FieldStruct("value", func(d *decode.D) {
d.FieldULEB128("level")
readTagInternalKey("internal_key", d)
})
case tagTypeDeletedFile:
d.FieldStruct("value", func(d *decode.D) {
d.FieldULEB128("level")
d.FieldULEB128("file_number")
})
case tagTypeNewFile:
d.FieldStruct("value", func(d *decode.D) {
d.FieldULEB128("level")
d.FieldULEB128("file_number")
d.FieldULEB128("file_size")
readTagInternalKey("smallest_internal_key", d)
readTagInternalKey("largest_internal_key", d)
})
default:
d.Fatalf("unknown tag: %d", tag)
}
})
}
})
}
func readLengthPrefixedString(name string, d *decode.D) {
d.FieldStruct(name, func(d *decode.D) {
length := d.FieldULEB128("length")
d.FieldUTF8("data", int(length))
})
}
func readTagInternalKey(name string, d *decode.D) {
d.FieldStruct(name, func(d *decode.D) {
length := d.FieldULEB128("length")
readInternalKey("data", int64(length), d)
})
}

View File

@ -0,0 +1,13 @@
### Limitations
- fragmented non-"full" records are not decoded further.
### Authors
- [@mikez](https://github.com/mikez), original author
### References
- https://github.com/google/leveldb/blob/main/doc/impl.md#manifest
- https://github.com/google/leveldb/blob/main/db/version_edit.cc

View File

@ -0,0 +1,152 @@
package leveldb
// https://github.com/google/leveldb/blob/main/doc/log_format.md
//
// Files in LevelDB using this format include:
// - *.log
// - MANIFEST-*
import (
"embed"
"github.com/wader/fq/format"
"github.com/wader/fq/internal/mathex"
"github.com/wader/fq/pkg/decode"
"github.com/wader/fq/pkg/interp"
"github.com/wader/fq/pkg/scalar"
)
//go:embed leveldb_log.md
var leveldbLogFS embed.FS
func init() {
interp.RegisterFormat(
format.LOG,
&decode.Format{
Description: "LevelDB Log",
Groups: []*decode.Group{format.Probe},
DecodeFn: ldbLogDecode,
})
interp.RegisterFS(leveldbLogFS)
}
type recordReadOptions struct {
// Both .log- and MANIFEST-files use the Log-format,
// i.e., a sequence of records split into 32KB blocks.
// However, the format of the data within the records differ.
// This function specifies how to read said data.
readDataFn func(size int64, recordType int, d *decode.D)
}
// https://github.com/google/leveldb/blob/main/db/log_format.h
const (
// checksum (4 bytes) + length (2 bytes) + record type (1 byte)
headerSize = (4 + 2 + 1) * 8
blockSize = (32 * 1024) * 8 // 32KB
recordTypeZero = 0 // preallocated file regions
recordTypeFull = 1
recordTypeFirst = 2 // fragments
recordTypeMiddle = 3
recordTypeLast = 4
)
var recordTypes = scalar.UintMapSymStr{
recordTypeZero: "zero",
recordTypeFull: "full",
recordTypeFirst: "first",
recordTypeMiddle: "middle",
recordTypeLast: "last",
}
func ldbLogDecode(d *decode.D) any {
rro := recordReadOptions{readDataFn: func(size int64, recordType int, d *decode.D) {
d.FieldRawLen("data", size)
}}
readBlockSequence(rro, d)
return nil
}
// Read a sequence of 32KB-blocks (the last one may be less).
// https://github.com/google/leveldb/blob/main/db/log_reader.cc#L189
func readBlockSequence(rro recordReadOptions, d *decode.D) {
d.Endian = decode.LittleEndian
d.FieldArray("blocks", func(d *decode.D) {
for d.BitsLeft() >= headerSize {
d.LimitedFn(mathex.Min(blockSize, d.BitsLeft()), func(d *decode.D) {
d.FieldStruct("block", bind(readLogBlock, rro))
})
}
})
if d.BitsLeft() > 0 {
// The reference implementation says:
// "[...] if buffer_ is non-empty, we have a truncated header at the
// end of the file, which can be caused by the writer crashing in the
// middle of writing the header. Instead of considering this an error,
// just report EOF."
d.FieldRawLen("truncated_block", d.BitsLeft())
}
}
// Read a Log-block, consisting of up to 32KB of records and an optional trailer.
//
// block := record* trailer?
func readLogBlock(rro recordReadOptions, d *decode.D) {
if d.BitsLeft() > blockSize {
d.Fatalf("Bits left greater than maximum log-block size of 32KB.")
}
// record*
d.FieldArray("records", func(d *decode.D) {
for d.BitsLeft() >= headerSize {
d.FieldStruct("record", bind(readLogRecord, rro))
}
})
// trailer?
if d.BitsLeft() > 0 {
d.FieldRawLen("trailer", d.BitsLeft())
}
}
// Read a Log-record.
//
// checksum: uint32 // crc32c of type and data[] ; little-endian
// length: uint16 // little-endian
// type: uint8 // One of FULL, FIRST, MIDDLE, LAST
// data: uint8[length]
//
// via https://github.com/google/leveldb/blob/main/doc/log_format.md
func readLogRecord(rro recordReadOptions, d *decode.D) {
// header
var checksumValue *decode.Value
var length int64
var recordType int
d.LimitedFn(headerSize, func(d *decode.D) {
d.FieldStruct("header", func(d *decode.D) {
d.FieldU32("checksum", scalar.UintHex)
checksumValue = d.FieldGet("checksum")
length = int64(d.FieldU16("length"))
recordType = int(d.FieldU8("record_type", recordTypes))
})
})
// verify checksum: record type (1 byte) + data (`length` bytes)
d.RangeFn(d.Pos()-8, (1+length)*8, func(d *decode.D) {
bytesToCheck := d.Bits(int(d.BitsLeft()))
actualChecksum := computeChecksum(bytesToCheck)
_ = checksumValue.TryUintScalarFn(d.UintAssert(uint64(actualChecksum)))
})
// data
dataSize := length * 8
rro.readDataFn(dataSize, recordType, d)
}
func bind(f func(recordReadOptions, *decode.D), rro recordReadOptions) func(*decode.D) {
return func(d *decode.D) {
f(rro, d)
}
}

View File

@ -0,0 +1,11 @@
### Limitations
- individual records are not merged and its data further decoded.
### Authors
- [@mikez](https://github.com/mikez), original author
### References
- https://github.com/google/leveldb/blob/main/doc/log_format.md

View File

@ -3,10 +3,14 @@ package leveldb
// https://github.com/google/leveldb/blob/main/doc/table_format.md
// https://github.com/google/leveldb/blob/main/doc/impl.md
// https://github.com/google/leveldb/blob/main/doc/index.md
//
// Files in LevelDB using this format include:
// - *.ldb
import (
"bytes"
"embed"
"fmt"
"hash/crc32"
"github.com/golang/snappy"
@ -17,7 +21,7 @@ import (
"github.com/wader/fq/pkg/scalar"
)
//go:embed leveldb_ldb.md
//go:embed leveldb_table.md
var leveldbFS embed.FS
func init() {
@ -26,7 +30,7 @@ func init() {
&decode.Format{
Description: "LevelDB Table",
Groups: []*decode.Group{format.Probe},
DecodeFn: ldbDecode,
DecodeFn: ldbTableDecode,
})
interp.RegisterFS(leveldbFS)
}
@ -40,6 +44,8 @@ const (
// echo http://code.google.com/p/leveldb/ | sha1sum
// https://github.com/google/leveldb/blob/main/table/format.h#L76
tableMagicNumber = 0xdb4775248b80fb57
uint32Size = int64(32)
uint64Size = int64(64)
)
// https://github.com/google/leveldb/blob/main/include/leveldb/options.h#L25
@ -61,12 +67,12 @@ var valueTypes = scalar.UintMapSymStr{
0x1: "value",
}
type BlockHandle struct {
Offset uint64
Size uint64
type blockHandle struct {
offset uint64
size uint64
}
func ldbDecode(d *decode.D) any {
func ldbTableDecode(d *decode.D) any {
d.Endian = decode.LittleEndian
// footer
@ -95,13 +101,13 @@ func ldbDecode(d *decode.D) any {
// metaindex
d.SeekAbs(metaIndexOffset * 8)
var metaHandles []BlockHandle
readBlock("metaindex", metaIndexSize, readKeyValueContents, func(d *decode.D) {
// BlockHandle
var metaHandles []blockHandle
readTableBlock("metaindex", metaIndexSize, readKeyValueContents, func(d *decode.D) {
// blockHandle
// https://github.com/google/leveldb/blob/main/table/format.cc#L24
handle := BlockHandle{
Offset: d.FieldULEB128("offset"),
Size: d.FieldULEB128("size"),
handle := blockHandle{
offset: d.FieldULEB128("offset"),
size: d.FieldULEB128("size"),
}
metaHandles = append(metaHandles, handle)
}, d)
@ -109,13 +115,13 @@ func ldbDecode(d *decode.D) any {
// index
d.SeekAbs(indexOffset * 8)
var dataHandles []BlockHandle
readBlock("index", indexSize, readKeyValueContents, func(d *decode.D) {
// BlockHandle
var dataHandles []blockHandle
readTableBlock("index", indexSize, readKeyValueContents, func(d *decode.D) {
// blockHandle
// https://github.com/google/leveldb/blob/main/table/format.cc#L24
handle := BlockHandle{
Offset: d.FieldULEB128("offset"),
Size: d.FieldULEB128("size"),
handle := blockHandle{
offset: d.FieldULEB128("offset"),
size: d.FieldULEB128("size"),
}
dataHandles = append(dataHandles, handle)
}, d)
@ -125,8 +131,8 @@ func ldbDecode(d *decode.D) any {
if len(metaHandles) > 0 {
d.FieldArray("meta", func(d *decode.D) {
for _, handle := range metaHandles {
d.SeekAbs(int64(handle.Offset) * 8)
readBlock("meta_block", int64(handle.Size), readMetaContent, nil, d)
d.SeekAbs(int64(handle.offset) * 8)
readTableBlock("meta_block", int64(handle.size), readMetaContent, nil, d)
}
})
}
@ -136,8 +142,8 @@ func ldbDecode(d *decode.D) any {
if len(dataHandles) > 0 {
d.FieldArray("data", func(d *decode.D) {
for _, handle := range dataHandles {
d.SeekAbs(int64(handle.Offset) * 8)
readBlock("data_block", int64(handle.Size), readKeyValueContents, nil, d)
d.SeekAbs(int64(handle.offset) * 8)
readTableBlock("data_block", int64(handle.size), readKeyValueContents, nil, d)
}
})
}
@ -147,43 +153,46 @@ func ldbDecode(d *decode.D) any {
// Readers
// Read block contents as well as compression + crc bytes following it.
// The function `readBlockContents` gets the uncompressed bytebuffer.
// Read block contents as well as compression + checksum bytes following it.
// The function `readTableBlockContents` gets the uncompressed bytebuffer.
// https://github.com/google/leveldb/blob/main/table/format.cc#L69
func readBlock(name string, size int64, readBlockContents func(size int64, valueCallbackFn func(d *decode.D), d *decode.D), valueCallbackFn func(d *decode.D), d *decode.D) {
func readTableBlock(name string, size int64, readTableBlockContents func(size int64, valueCallbackFn func(d *decode.D), d *decode.D), valueCallbackFn func(d *decode.D), d *decode.D) {
d.FieldStruct(name, func(d *decode.D) {
start := d.Pos()
br := d.RawLen(size * 8)
// compression (1 byte)
compressionType := d.FieldU8("compression", compressionTypes, scalar.UintHex)
// crc (4 bytes)
// checksum (4 bytes)
data := d.ReadAllBits(br)
bytesToCheck := append(data, uint8(compressionType))
maskedCRCInt := maskedCrc32(bytesToCheck)
d.FieldU32("crc", d.UintAssert(uint64(maskedCRCInt)), scalar.UintHex)
checksum := computeChecksum(bytesToCheck)
d.FieldU32("checksum", d.UintAssert(uint64(checksum)), scalar.UintHex)
// decompress if needed
d.SeekAbs(start)
if compressionType == compressionTypeNone {
d.FieldStruct("uncompressed", func(d *decode.D) {
readBlockContents(size, valueCallbackFn, d)
readTableBlockContents(size, valueCallbackFn, d)
})
} else {
compressedSize := size
compressed := data
bb := &bytes.Buffer{}
fmt.Println(bb, bb.Len())
switch compressionType {
case compressionTypeSnappy:
decompressed, err := snappy.Decode(nil, compressed)
if err != nil {
d.Fatalf("failed decompressing data: %v", err)
d.Errorf("failed decompressing data: %v", err)
}
d.Copy(bb, bytes.NewReader(decompressed))
default:
d.Fatalf("Unsupported compression type: %x", compressionType)
d.Errorf("Unsupported compression type: %x", compressionType)
}
if bb.Len() > 0 {
d.FieldStructRootBitBufFn("uncompressed", bitio.NewBitReader(bb.Bytes(), -1), func(d *decode.D) {
readTableBlockContents(int64(bb.Len()), valueCallbackFn, d)
})
}
d.FieldStructRootBitBufFn("uncompressed", bitio.NewBitReader(bb.Bytes(), -1), func(d *decode.D) {
readBlockContents(int64(bb.Len()), valueCallbackFn, d)
})
d.FieldRawLen("compressed", compressedSize*8)
}
@ -194,8 +203,6 @@ func readBlock(name string, size int64, readBlockContents func(size int64, value
// https://github.com/google/leveldb/blob/main/table/block_builder.cc#L16
// https://github.com/google/leveldb/blob/main/table/block.cc
func readKeyValueContents(size int64, valueCallbackFn func(d *decode.D), d *decode.D) {
uint32Size := int64(32)
uint64Size := int64(64)
start := d.Pos()
end := start + size*8
@ -223,13 +230,7 @@ func readKeyValueContents(size int64, valueCallbackFn func(d *decode.D), d *deco
d.FieldULEB128("shared_bytes")
unshared := int64(d.FieldULEB128("unshared_bytes"))
valueLength := d.FieldULEB128("value_length")
// InternalKey
// https://github.com/google/leveldb/blob/main/db/dbformat.h#L171
d.FieldStruct("key_delta", func(d *decode.D) {
d.FieldUTF8("user_key", int(unshared-uint64Size/8))
d.FieldU8("type", valueTypes, scalar.UintHex)
d.FieldU56("sequence_number")
})
readInternalKey("internal_key", unshared, d)
if valueCallbackFn == nil {
d.FieldUTF8("value", int(valueLength))
} else {
@ -240,6 +241,16 @@ func readKeyValueContents(size int64, valueCallbackFn func(d *decode.D), d *deco
})
}
func readInternalKey(name string, bitSize int64, d *decode.D) {
// InternalKey
// https://github.com/google/leveldb/blob/main/db/dbformat.h#L171
d.FieldStruct(name, func(d *decode.D) {
d.FieldUTF8("user_key", int(bitSize-uint64Size/8))
d.FieldU8("type", valueTypes, scalar.UintHex)
d.FieldU56("sequence_number")
})
}
// Read content encoded in the "filter" or "stats" Meta Block format.
// https://github.com/google/leveldb/blob/main/doc/table_format.md#filter-meta-block
// https://github.com/google/leveldb/blob/main/table/filter_block.cc
@ -250,13 +261,15 @@ func readMetaContent(size int64, valueCallbackFn func(d *decode.D), d *decode.D)
// Helpers
func maskedCrc32(bytes []uint8) uint32 {
// Compute the checksum: a CRC32 as in RFC3720 + custom mask.
// https://datatracker.ietf.org/doc/html/rfc3720#appendix-B.4
func computeChecksum(bytes []uint8) uint32 {
crc32C := crc32.New(crc32.MakeTable(crc32.Castagnoli))
crc32C.Write(bytes)
return mask(crc32C.Sum32())
}
// Return a masked representation of a CRC.
// Return a masked representation of the CRC.
// https://github.com/google/leveldb/blob/main/util/crc32c.h#L29
func mask(crc uint32) uint32 {
const kMaskDelta = 0xa282ead8

View File

@ -0,0 +1,11 @@
$ fq -h leveldb_descriptor
leveldb_descriptor: LevelDB Descriptor decoder
Decode examples
===============
# Decode file as leveldb_descriptor
$ fq -d leveldb_descriptor . file
# Decode value as leveldb_descriptor
... | leveldb_descriptor

View File

@ -0,0 +1,43 @@
$ fq -h leveldb_log
leveldb_log: LevelDB Log decoder
Decode examples
===============
# Decode file as leveldb_log
$ fq -d leveldb_log . file
# Decode value as leveldb_log
... | leveldb_log
Limitations
===========
- individual records are not merged and its data further decoded.
Authors
=======
- @mikez (https://github.com/mikez), original author
References
==========
- https://github.com/google/leveldb/blob/main/doc/log_format.md
leveldb_log: LevelDB Log decoder
Decode examples
===============
# Decode file as leveldb_log
$ fq -d leveldb_log . file
# Decode value as leveldb_log
... | leveldb_log
Limitations
===========
- individual records are not merged and its data further decoded.
Authors
=======
- @mikez (https://github.com/mikez), original author
References
==========
- https://github.com/google/leveldb/blob/main/doc/log_format.md

View File

@ -1,13 +1,13 @@
$ fq -h leveldb_ldb
leveldb_ldb: LevelDB Table decoder
$ fq -h leveldb_table
leveldb_table: LevelDB Table decoder
Decode examples
===============
# Decode file as leveldb_ldb
$ fq -d leveldb_ldb . file
# Decode value as leveldb_ldb
... | leveldb_ldb
# Decode file as leveldb_table
$ fq -d leveldb_table . file
# Decode value as leveldb_table
... | leveldb_table
Limitations
===========

View File

@ -0,0 +1,40 @@
$ fq -d leveldb_log dv log_only.ldb/000003.log
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: log_only.ldb/000003.log (leveldb_log) 0x0-0x62c (1580)
| | | blocks[0:1]: 0x0-0x62c (1580)
| | | [0]{}: block 0x0-0x62c (1580)
| | | records[0:4]: 0x0-0x62c (1580)
| | | [0]{}: record 0x0-0x1df (479)
| | | header{}: 0x0-0x7 (7)
0x000|18 93 40 61 |..@a | checksum: 0x61409318 (valid) 0x0-0x4 (4)
0x000| d8 01 | .. | length: 472 0x4-0x6 (2)
0x000| 01 | . | record_type: "full" (1) 0x6-0x7 (1)
0x000| 01 00 00 00 00 00 00 00 01| .........| data: raw bits 0x7-0x1df (472)
0x010|00 00 00 01 0b 6c 6f 72 65 6d 2e 6c 6f 72 65 6d|.....lorem.lorem|
* |until 0x1de.7 (472) | |
| | | [1]{}: record 0x1df-0x3be (479)
| | | header{}: 0x1df-0x1e6 (7)
0x1d0| 5a| Z| checksum: 0x12ba655a (valid) 0x1df-0x1e3 (4)
0x1e0|65 ba 12 |e.. |
0x1e0| d8 01 | .. | length: 472 0x1e3-0x1e5 (2)
0x1e0| 01 | . | record_type: "full" (1) 0x1e5-0x1e6 (1)
0x1e0| 02 00 00 00 00 00 00 00 01 00| ..........| data: raw bits 0x1e6-0x3be (472)
0x1f0|00 00 01 0b 6c 6f 72 65 6d 2e 69 70 73 75 6d bd|....lorem.ipsum.|
* |until 0x3bd.7 (472) | |
| | | [2]{}: record 0x3be-0x59d (479)
| | | header{}: 0x3be-0x3c5 (7)
0x3b0| 09 81| ..| checksum: 0x890d8109 (valid) 0x3be-0x3c2 (4)
0x3c0|0d 89 |.. |
0x3c0| d8 01 | .. | length: 472 0x3c2-0x3c4 (2)
0x3c0| 01 | . | record_type: "full" (1) 0x3c4-0x3c5 (1)
0x3c0| 03 00 00 00 00 00 00 00 01 00 00| ...........| data: raw bits 0x3c5-0x59d (472)
0x3d0|00 01 0b 6c 6f 72 65 6d 2e 64 6f 6c 6f 72 bd 03|...lorem.dolor..|
* |until 0x59c.7 (472) | |
| | | [3]{}: record 0x59d-0x62c (143)
| | | header{}: 0x59d-0x5a4 (7)
0x590| a0 86 3e| ..>| checksum: 0xb3e86a0 (valid) 0x59d-0x5a1 (4)
0x5a0|0b |. |
0x5a0| 88 00 | .. | length: 136 0x5a1-0x5a3 (2)
0x5a0| 01 | . | record_type: "full" (1) 0x5a3-0x5a4 (1)
0x5a0| 04 00 00 00 00 00 00 00 01 00 00 00| ............| data: raw bits 0x5a4-0x62c (136)
0x5b0|01 03 72 6f 77 76 52 6f 77 2c 20 72 6f 77 2c 20|..rowvRow, row, |
* |until 0x62b.7 (end) (136) | |

View File

@ -0,0 +1,57 @@
$ fq -d leveldb_descriptor dv uncompressed.ldb/MANIFEST-000004
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: uncompressed.ldb/MANIFEST-000004 (leveldb_descriptor) 0x0-0x57 (87)
| | | blocks[0:1]: 0x0-0x57 (87)
| | | [0]{}: block 0x0-0x57 (87)
| | | records[0:2]: 0x0-0x57 (87)
| | | [0]{}: record 0x0-0x23 (35)
| | | header{}: 0x0-0x7 (7)
0x00|56 f9 b8 f8 |V... | checksum: 0xf8b8f956 (valid) 0x0-0x4 (4)
0x00| 1c 00 | .. | length: 28 0x4-0x6 (2)
0x00| 01 | . | record_type: "full" (1) 0x6-0x7 (1)
| | | data{}: 0x7-0x23 (28)
| | | tags[0:1]: 0x7-0x23 (28)
| | | [0]{}: tag 0x7-0x23 (28)
0x00| 01 | . | key: "comparator" (1) 0x7-0x8 (1)
| | | value{}: 0x8-0x23 (27)
0x00| 1a | . | length: 26 0x8-0x9 (1)
0x00| 6c 65 76 65 6c 64 62| leveldb| data: "leveldb.BytewiseComparator" 0x9-0x23 (26)
0x10|2e 42 79 74 65 77 69 73 65 43 6f 6d 70 61 72 61|.BytewiseCompara|
0x20|74 6f 72 |tor |
| | | [1]{}: record 0x23-0x57 (52)
| | | header{}: 0x23-0x2a (7)
0x20| d2 53 ab 33 | .S.3 | checksum: 0x33ab53d2 (valid) 0x23-0x27 (4)
0x20| 2d 00 | -. | length: 45 0x27-0x29 (2)
0x20| 01 | . | record_type: "full" (1) 0x29-0x2a (1)
| | | data{}: 0x2a-0x57 (45)
| | | tags[0:5]: 0x2a-0x57 (45)
| | | [0]{}: tag 0x2a-0x2c (2)
0x20| 02 | . | key: "log_number" (2) 0x2a-0x2b (1)
0x20| 06 | . | value: 6 0x2b-0x2c (1)
| | | [1]{}: tag 0x2c-0x2e (2)
0x20| 09 | . | key: "previous log number" (9) 0x2c-0x2d (1)
0x20| 00 | . | value: 0 0x2d-0x2e (1)
| | | [2]{}: tag 0x2e-0x30 (2)
0x20| 03 | . | key: "next file number" (3) 0x2e-0x2f (1)
0x20| 07| .| value: 7 0x2f-0x30 (1)
| | | [3]{}: tag 0x30-0x32 (2)
0x30|04 |. | key: "last sequence" (4) 0x30-0x31 (1)
0x30| 04 | . | value: 4 0x31-0x32 (1)
| | | [4]{}: tag 0x32-0x57 (37)
0x30| 07 | . | key: "new file" (7) 0x32-0x33 (1)
| | | value{}: 0x33-0x57 (36)
0x30| 00 | . | level: 0 0x33-0x34 (1)
0x30| 05 | . | file_number: 5 0x34-0x35 (1)
0x30| da 0c | .. | file_size: 1626 0x35-0x37 (2)
| | | smallest_internal_key{}: 0x37-0x4b (20)
0x30| 13 | . | length: 19 0x37-0x38 (1)
| | | data{}: 0x38-0x4b (19)
0x30| 6c 6f 72 65 6d 2e 64 6f| lorem.do| user_key: "lorem.dolor" 0x38-0x43 (11)
0x40|6c 6f 72 |lor |
0x40| 01 | . | type: "value" (0x1) 0x43-0x44 (1)
0x40| 03 00 00 00 00 00 00 | ....... | sequence_number: 3 0x44-0x4b (7)
| | | largest_internal_key{}: 0x4b-0x57 (12)
0x40| 0b | . | length: 11 0x4b-0x4c (1)
| | | data{}: 0x4c-0x57 (11)
0x40| 72 6f 77 | row | user_key: "row" 0x4c-0x4f (3)
0x40| 01| .| type: "value" (0x1) 0x4f-0x50 (1)
0x50|04 00 00 00 00 00 00| |.......| | sequence_number: 4 0x50-0x57 (7)

View File

@ -1,5 +1,5 @@
$ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: snappy.ldb/000005.ldb (leveldb_ldb) 0x0-0x2c4 (708)
$ fq -d leveldb_table dv snappy.ldb/000005.ldb
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: snappy.ldb/000005.ldb (leveldb_table) 0x0-0x2c4 (708)
| | | data[0:1]: 0x0-0x26b (619)
| | | [0]{}: data_block 0x0-0x26b (619)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| uncompressed{}: 0x0-0x5fc (1532)
@ -8,7 +8,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x000|00 |. | shared_bytes: 0 0x0-0x1 (1)
0x000| 13 | . | unshared_bytes: 19 0x1-0x2 (1)
0x000| bd 03 | .. | value_length: 445 0x2-0x4 (2)
| | | key_delta{}: 0x4-0x17 (19)
| | | internal_key{}: 0x4-0x17 (19)
0x000| 6c 6f 72 65 6d 2e 64 6f 6c 6f 72 | lorem.dolor | user_key: "lorem.dolor" 0x4-0xf (11)
0x000| 01| .| type: "value" (0x1) 0xf-0x10 (1)
0x001|03 00 00 00 00 00 00 |....... | sequence_number: 3 0x10-0x17 (7)
@ -19,7 +19,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x01d| 06 | . | shared_bytes: 6 0x1d4-0x1d5 (1)
0x01d| 0d | . | unshared_bytes: 13 0x1d5-0x1d6 (1)
0x01d| bd 03 | .. | value_length: 445 0x1d6-0x1d8 (2)
| | | key_delta{}: 0x1d8-0x1e5 (13)
| | | internal_key{}: 0x1d8-0x1e5 (13)
0x01d| 69 70 73 75 6d | ipsum | user_key: "ipsum" 0x1d8-0x1dd (5)
0x01d| 01 | . | type: "value" (0x1) 0x1dd-0x1de (1)
0x01d| 02 00| ..| sequence_number: 2 0x1de-0x1e5 (7)
@ -31,7 +31,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x03a| 06 | . | shared_bytes: 6 0x3a2-0x3a3 (1)
0x03a| 0d | . | unshared_bytes: 13 0x3a3-0x3a4 (1)
0x03a| bd 03 | .. | value_length: 445 0x3a4-0x3a6 (2)
| | | key_delta{}: 0x3a6-0x3b3 (13)
| | | internal_key{}: 0x3a6-0x3b3 (13)
0x03a| 6c 6f 72 65 6d | lorem | user_key: "lorem" 0x3a6-0x3ab (5)
0x03a| 01 | . | type: "value" (0x1) 0x3ab-0x3ac (1)
0x03a| 01 00 00 00| ....| sequence_number: 1 0x3ac-0x3b3 (7)
@ -43,7 +43,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x057|00 |. | shared_bytes: 0 0x570-0x571 (1)
0x057| 0b | . | unshared_bytes: 11 0x571-0x572 (1)
0x057| 76 | v | value_length: 118 0x572-0x573 (1)
| | | key_delta{}: 0x573-0x57e (11)
| | | internal_key{}: 0x573-0x57e (11)
0x057| 72 6f 77 | row | user_key: "row" 0x573-0x576 (3)
0x057| 01 | . | type: "value" (0x1) 0x576-0x577 (1)
0x057| 04 00 00 00 00 00 00 | ....... | sequence_number: 4 0x577-0x57e (7)
@ -57,7 +57,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x00000|fc 0b 44 00 13 bd 03 6c 6f 72 65 6d 2e 64 6f 6c|..D....lorem.dol| compressed: raw bits 0x0-0x266 (614)
* |until 0x265.7 (614) | |
0x00260| 01 | . | compression: "snappy" (0x1) 0x266-0x267 (1)
0x00260| b6 9d 28 0d | ..(. | crc: 0xd289db6 (valid) 0x267-0x26b (4)
0x00260| b6 9d 28 0d | ..(. | checksum: 0xd289db6 (valid) 0x267-0x26b (4)
| | | metaindex{}: 0x26b-0x278 (13)
| | | uncompressed{}: 0x26b-0x273 (8)
| | | trailer{}: 0x26b-0x273 (8)
@ -66,7 +66,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x00260| 01| .| num_restarts: 1 0x26f-0x273 (4)
0x00270|00 00 00 |... |
0x00270| 00 | . | compression: "none" (0x0) 0x273-0x274 (1)
0x00270| c0 f2 a1 b0 | .... | crc: 0xb0a1f2c0 (valid) 0x274-0x278 (4)
0x00270| c0 f2 a1 b0 | .... | checksum: 0xb0a1f2c0 (valid) 0x274-0x278 (4)
| | | index{}: 0x278-0x294 (28)
| | | uncompressed{}: 0x278-0x28f (23)
| | | entries[0:1]: 0x278-0x287 (15)
@ -74,7 +74,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x00270| 00 | . | shared_bytes: 0 0x278-0x279 (1)
0x00270| 09 | . | unshared_bytes: 9 0x279-0x27a (1)
0x00270| 03 | . | value_length: 3 0x27a-0x27b (1)
| | | key_delta{}: 0x27b-0x284 (9)
| | | internal_key{}: 0x27b-0x284 (9)
0x00270| 73 | s | user_key: "s" 0x27b-0x27c (1)
0x00270| 01 | . | type: "value" (0x1) 0x27c-0x27d (1)
0x00270| ff ff ff| ...| sequence_number: 72057594037927935 0x27d-0x284 (7)
@ -87,7 +87,7 @@ $ fq -d leveldb_ldb dv snappy.ldb/000005.ldb
0x00280| 00 00 00 00 | .... | [0]: 0 restart 0x287-0x28b (4)
0x00280| 01 00 00 00 | .... | num_restarts: 1 0x28b-0x28f (4)
0x00280| 00| .| compression: "none" (0x0) 0x28f-0x290 (1)
0x00290|68 24 42 91 |h$B. | crc: 0x91422468 (valid) 0x290-0x294 (4)
0x00290|68 24 42 91 |h$B. | checksum: 0x91422468 (valid) 0x290-0x294 (4)
| | | footer{}: 0x294-0x2c4 (48)
| | | metaindex_handle{}: 0x294-0x297 (3)
0x00290| eb 04 | .. | offset: 619 0x294-0x296 (2)

View File

@ -1,5 +1,5 @@
$ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: uncompressed.ldb/000005.ldb (leveldb_ldb) 0x0-0x65a (1626)
$ fq -d leveldb_table dv uncompressed.ldb/000005.ldb
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.{}: uncompressed.ldb/000005.ldb (leveldb_table) 0x0-0x65a (1626)
| | | data[0:1]: 0x0-0x601 (1537)
| | | [0]{}: data_block 0x0-0x601 (1537)
| | | uncompressed{}: 0x0-0x5fc (1532)
@ -8,7 +8,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x000|00 |. | shared_bytes: 0 0x0-0x1 (1)
0x000| 13 | . | unshared_bytes: 19 0x1-0x2 (1)
0x000| bd 03 | .. | value_length: 445 0x2-0x4 (2)
| | | key_delta{}: 0x4-0x17 (19)
| | | internal_key{}: 0x4-0x17 (19)
0x000| 6c 6f 72 65 6d 2e 64 6f 6c 6f 72 | lorem.dolor | user_key: "lorem.dolor" 0x4-0xf (11)
0x000| 01| .| type: "value" (0x1) 0xf-0x10 (1)
0x010|03 00 00 00 00 00 00 |....... | sequence_number: 3 0x10-0x17 (7)
@ -19,7 +19,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x1d0| 06 | . | shared_bytes: 6 0x1d4-0x1d5 (1)
0x1d0| 0d | . | unshared_bytes: 13 0x1d5-0x1d6 (1)
0x1d0| bd 03 | .. | value_length: 445 0x1d6-0x1d8 (2)
| | | key_delta{}: 0x1d8-0x1e5 (13)
| | | internal_key{}: 0x1d8-0x1e5 (13)
0x1d0| 69 70 73 75 6d | ipsum | user_key: "ipsum" 0x1d8-0x1dd (5)
0x1d0| 01 | . | type: "value" (0x1) 0x1dd-0x1de (1)
0x1d0| 02 00| ..| sequence_number: 2 0x1de-0x1e5 (7)
@ -31,7 +31,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x3a0| 06 | . | shared_bytes: 6 0x3a2-0x3a3 (1)
0x3a0| 0d | . | unshared_bytes: 13 0x3a3-0x3a4 (1)
0x3a0| bd 03 | .. | value_length: 445 0x3a4-0x3a6 (2)
| | | key_delta{}: 0x3a6-0x3b3 (13)
| | | internal_key{}: 0x3a6-0x3b3 (13)
0x3a0| 6c 6f 72 65 6d | lorem | user_key: "lorem" 0x3a6-0x3ab (5)
0x3a0| 01 | . | type: "value" (0x1) 0x3ab-0x3ac (1)
0x3a0| 01 00 00 00| ....| sequence_number: 1 0x3ac-0x3b3 (7)
@ -43,7 +43,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x570|00 |. | shared_bytes: 0 0x570-0x571 (1)
0x570| 0b | . | unshared_bytes: 11 0x571-0x572 (1)
0x570| 76 | v | value_length: 118 0x572-0x573 (1)
| | | key_delta{}: 0x573-0x57e (11)
| | | internal_key{}: 0x573-0x57e (11)
0x570| 72 6f 77 | row | user_key: "row" 0x573-0x576 (3)
0x570| 01 | . | type: "value" (0x1) 0x576-0x577 (1)
0x570| 04 00 00 00 00 00 00 | ....... | sequence_number: 4 0x577-0x57e (7)
@ -55,7 +55,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x5f0| 00 00 00 00 | .... | [0]: 0 restart 0x5f4-0x5f8 (4)
0x5f0| 01 00 00 00 | .... | num_restarts: 1 0x5f8-0x5fc (4)
0x5f0| 00 | . | compression: "none" (0x0) 0x5fc-0x5fd (1)
0x5f0| 6f 99 1d| o..| crc: 0xb31d996f (valid) 0x5fd-0x601 (4)
0x5f0| 6f 99 1d| o..| checksum: 0xb31d996f (valid) 0x5fd-0x601 (4)
0x600|b3 |. |
| | | metaindex{}: 0x601-0x60e (13)
| | | uncompressed{}: 0x601-0x609 (8)
@ -64,7 +64,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x600| 00 00 00 00 | .... | [0]: 0 restart 0x601-0x605 (4)
0x600| 01 00 00 00 | .... | num_restarts: 1 0x605-0x609 (4)
0x600| 00 | . | compression: "none" (0x0) 0x609-0x60a (1)
0x600| c0 f2 a1 b0 | .... | crc: 0xb0a1f2c0 (valid) 0x60a-0x60e (4)
0x600| c0 f2 a1 b0 | .... | checksum: 0xb0a1f2c0 (valid) 0x60a-0x60e (4)
| | | index{}: 0x60e-0x62a (28)
| | | uncompressed{}: 0x60e-0x625 (23)
| | | entries[0:1]: 0x60e-0x61d (15)
@ -72,7 +72,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x600| 00 | . | shared_bytes: 0 0x60e-0x60f (1)
0x600| 09| .| unshared_bytes: 9 0x60f-0x610 (1)
0x610|03 |. | value_length: 3 0x610-0x611 (1)
| | | key_delta{}: 0x611-0x61a (9)
| | | internal_key{}: 0x611-0x61a (9)
0x610| 73 | s | user_key: "s" 0x611-0x612 (1)
0x610| 01 | . | type: "value" (0x1) 0x612-0x613 (1)
0x610| ff ff ff ff ff ff ff | ....... | sequence_number: 72057594037927935 0x613-0x61a (7)
@ -85,7 +85,7 @@ $ fq -d leveldb_ldb dv uncompressed.ldb/000005.ldb
0x620|00 |. |
0x620| 01 00 00 00 | .... | num_restarts: 1 0x621-0x625 (4)
0x620| 00 | . | compression: "none" (0x0) 0x625-0x626 (1)
0x620| 68 e2 bf 46 | h..F | crc: 0x46bfe268 (valid) 0x626-0x62a (4)
0x620| 68 e2 bf 46 | h..F | checksum: 0x46bfe268 (valid) 0x626-0x62a (4)
| | | footer{}: 0x62a-0x65a (48)
| | | metaindex_handle{}: 0x62a-0x62d (3)
0x620| 81 0c | .. | offset: 1537 0x62a-0x62c (2)

Binary file not shown.

View File

@ -0,0 +1 @@
MANIFEST-000002

View File

View File

@ -0,0 +1,2 @@
2023/12/05-20:38:42.412901 0x107ac5e00 Creating DB ./log_only.ldb since it was missing.
2023/12/05-20:38:42.512812 0x107ac5e00 Delete type=3 #1

Binary file not shown.

View File

@ -9,11 +9,12 @@ import snappy # pip install python-snappy
def main():
make("./lorem.json", "./uncompressed.ldb", compression=None)
make("./lorem.json", "./snappy.ldb", compression="snappy")
make("./lorem.json", "./uncompressed.ldb", reopen=True)
make("./lorem.json", "./snappy.ldb", compression="snappy", reopen=True)
make("./lorem.json", "./log_only.ldb", compression=None)
def make(input_filepath, output_filepath, compression):
def make(input_filepath, output_filepath, compression=None, reopen=False):
if os.path.exists(output_filepath):
raise FileExistsError(f"The file {output_filepath} already exists.")
# make a .ldb file and a .log file within
@ -21,9 +22,11 @@ def make(input_filepath, output_filepath, compression):
for key, value in read_json(input_filepath).items():
db.put(key.encode(), value.encode())
db.close()
# reopen, so a .ldb file is generated within the .ldb directory
db = plyvel.DB(output_filepath, compression=compression)
db.close()
if reopen:
# reopen, so a .ldb file is generated within the .ldb directory;
# otherwise there's a .log file only with the fresh changes.
db = plyvel.DB(output_filepath, compression=compression)
db.close()
# Helpers

View File

@ -1,5 +1,5 @@
2023/12/04-16:40:03.362678 0x113e92e00 Recovering log #3
2023/12/04-16:40:03.362792 0x113e92e00 Level-0 table #5: started
2023/12/04-16:40:03.379293 0x113e92e00 Level-0 table #5: 708 bytes OK
2023/12/04-16:40:03.429992 0x113e92e00 Delete type=0 #3
2023/12/04-16:40:03.430055 0x113e92e00 Delete type=3 #2
2023/12/05-20:38:42.349463 0x107ac5e00 Recovering log #3
2023/12/05-20:38:42.350562 0x107ac5e00 Level-0 table #5: started
2023/12/05-20:38:42.359725 0x107ac5e00 Level-0 table #5: 708 bytes OK
2023/12/05-20:38:42.411415 0x107ac5e00 Delete type=0 #3
2023/12/05-20:38:42.411476 0x107ac5e00 Delete type=3 #2

View File

@ -1,2 +1,2 @@
2023/12/04-16:40:03.272944 0x113e92e00 Creating DB ./snappy.ldb since it was missing.
2023/12/04-16:40:03.360198 0x113e92e00 Delete type=3 #1
2023/12/05-20:38:42.215352 0x107ac5e00 Creating DB ./snappy.ldb since it was missing.
2023/12/05-20:38:42.294790 0x107ac5e00 Delete type=3 #1

View File

@ -1,5 +1,5 @@
2023/12/04-16:40:03.212862 0x113e92e00 Recovering log #3
2023/12/04-16:40:03.212973 0x113e92e00 Level-0 table #5: started
2023/12/04-16:40:03.218905 0x113e92e00 Level-0 table #5: 1626 bytes OK
2023/12/04-16:40:03.271281 0x113e92e00 Delete type=0 #3
2023/12/04-16:40:03.271740 0x113e92e00 Delete type=3 #2
2023/12/05-20:38:41.945807 0x107ac5e00 Recovering log #3
2023/12/05-20:38:41.945944 0x107ac5e00 Level-0 table #5: started
2023/12/05-20:38:41.953310 0x107ac5e00 Level-0 table #5: 1626 bytes OK
2023/12/05-20:38:42.214073 0x107ac5e00 Delete type=0 #3
2023/12/05-20:38:42.214131 0x107ac5e00 Delete type=3 #2

View File

@ -1,2 +1,2 @@
2023/12/04-16:40:03.124613 0x113e92e00 Creating DB ./uncompressed.ldb since it was missing.
2023/12/04-16:40:03.210513 0x113e92e00 Delete type=3 #1
2023/12/05-20:38:41.773771 0x107ac5e00 Creating DB ./uncompressed.ldb since it was missing.
2023/12/05-20:38:41.910278 0x107ac5e00 Delete type=3 #1