sapling/linelog/linelog.c
Jun Wu 6ec93f77f1 linelog: implement linelog_replacelines
Summary: This is the core API to do writes.

Test Plan: `gcc -Wall -Wextra -Wconversion -c linelog.c`

Reviewers: #mercurial, ttung, simonfar

Reviewed By: simonfar

Subscribers: simonfar, mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D3641637

Tasks: 12416202

Signature: t1:3641637:1470335236:758df55835ac6c42212783d4cc12302588ce2216
2016-07-29 13:43:55 +01:00

352 lines
11 KiB
C

/*
* Copyright 2016-present Facebook. All Rights Reserved.
*
* linelog.c: data structure tracking line changes
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2 or any later version.
*/
#include "linelog.h"
#include <assert.h> /* assert */
#include <stdbool.h> /* bool */
#include <stdlib.h> /* realloc, free */
#include <string.h> /* NULL, memcpy, memmove, memset */
#include <arpa/inet.h> /* htonl, ntohl */
/* linelog_buf.data is a plain array of instructions.
a linelog instruction has 8 bytes:
opcode: 2 bits (linelog_opcode)
operand1: 30 bits (linelog_revnum)
operand2: 32 bits (linelog_offset | linelog_linenum)
the first 8-byte slot is not a real instruction, but a 32-bit maxrev then
a 32-bit instruction count indicating used buffer size. it can be parsed as
a normal instruction to extract the information. the code usually uses
"inst0" as the variable name for that purpose.
real instructions start from the 9th byte. */
typedef enum {
JGE = 0, /* if rev >= operand1, jump to operand2 */
JL = 1, /* if rev < operand1, jump to operand2 */
LINE = 2, /* line introduced by rev = operand1, linenum = operand2 */
} linelog_opcode;
typedef struct {
linelog_opcode opcode;
union {
uint32_t operand1;
linelog_revnum rev;
};
union {
uint32_t operand2;
linelog_linenum linenum;
linelog_offset offset;
};
} linelog_inst;
/* size of the encoded representation, not sizeof(linelog_inst) */
#define INST_SIZE 8
/* like linelog_{offset,linenum} but less likely to overflow */
typedef size_t linelog_loffset;
typedef size_t linelog_llinenum;
/* hard limits, smaller than the physical limits to reserve some bits */
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
static const linelog_loffset MAX_OFFSET =
MIN(0x0ffffff0u, SIZE_MAX / INST_SIZE);
static const linelog_llinenum MAX_LINENUM =
MIN(0x1ffffff0u, SIZE_MAX / sizeof(linelog_lineinfo));
static const linelog_revnum MAX_REVNUM = 0x1ffffff0u;
/* uint8_t[8] -> linelog_inst */
static inline void decode(const uint8_t data[INST_SIZE], linelog_inst *inst) {
uint32_t buf[2];
memcpy(buf, data, sizeof(buf));
buf[0] = ntohl(buf[0]);
buf[1] = ntohl(buf[1]);
inst->opcode = (linelog_opcode)(buf[0] & 3);
inst->operand1 = (buf[0] >> 2) & 0x3fffffffu;
inst->operand2 = buf[1];
}
/* uint8_t[8] <- linelog_inst */
static inline void encode(uint8_t data[INST_SIZE], const linelog_inst *inst) {
uint32_t buf[2];
buf[0] = htonl((uint32_t)(inst->opcode) | (inst->operand1 << 2));
buf[1] = htonl(inst->operand2);
memcpy(data, buf, sizeof(buf));
}
/* read instruction, with error checks */
static inline linelog_result readinst(const linelog_buf *buf,
linelog_inst *inst, linelog_loffset offset) {
if (buf == NULL || buf->data == NULL || buf->size < INST_SIZE
|| offset >= MAX_OFFSET)
return LINELOG_RESULT_EILLDATA;
size_t len = htonl(((const uint32_t *)buf->data)[1]);
if (len > buf->size / INST_SIZE || offset >= len)
return LINELOG_RESULT_EILLDATA;
size_t offsetinbytes = (size_t)offset * INST_SIZE;
decode(buf->data + offsetinbytes, inst);
return LINELOG_RESULT_OK;
}
/* write instruction, with error checks */
static inline linelog_result writeinst(linelog_buf *buf,
const linelog_inst *inst, linelog_loffset offset) {
if (offset >= MAX_OFFSET)
return LINELOG_RESULT_EOVERFLOW;
if (buf == NULL || (buf->data == NULL && buf->size > 0))
return LINELOG_RESULT_EILLDATA;
size_t offsetinbytes = (size_t)offset * INST_SIZE;
if (offsetinbytes + INST_SIZE > buf->size) {
buf->neededsize = offsetinbytes + INST_SIZE;
return LINELOG_RESULT_ENEEDRESIZE;
}
encode(buf->data + offsetinbytes, inst);
return LINELOG_RESULT_OK;
}
/* helper to make code shorter */
#define returnonerror(expr) { \
linelog_result result = (expr); \
if (result != LINELOG_RESULT_OK) \
return result; \
}
#define mustsuccess(expr) { \
linelog_result result = (expr); \
assert(result == LINELOG_RESULT_OK); \
}
/* ensure `ar->lines[0:linecount]` are valid */
static linelog_result reservelines(linelog_annotateresult *ar,
linelog_llinenum linecount) {
if (linecount >= MAX_LINENUM)
return LINELOG_RESULT_EOVERFLOW;
if (ar->maxlinecount < linecount) {
size_t size = sizeof(linelog_lineinfo) * linecount;
void *p = realloc(ar->lines, size);
if (p == NULL)
return LINELOG_RESULT_ENOMEM;
ar->lines = (linelog_lineinfo *)p;
ar->maxlinecount = (linelog_linenum)linecount;
}
return LINELOG_RESULT_OK;
}
/* APIs declared in .h */
void linelog_annotateresult_clear(linelog_annotateresult *ar) {
free(ar->lines);
memset(ar, 0, sizeof(linelog_annotateresult));
}
linelog_result linelog_clear(linelog_buf *buf) {
linelog_inst insts[2] = { { .offset = 2 }, { .offset = 0 } };
returnonerror(writeinst(buf, &insts[1], 1));
returnonerror(writeinst(buf, &insts[0], 0));
return LINELOG_RESULT_OK;
}
size_t linelog_getactualsize(const linelog_buf *buf) {
linelog_inst inst0;
linelog_result r = readinst(buf, &inst0, 0);
if (r != LINELOG_RESULT_OK)
return 0;
return (size_t)(inst0.offset) * INST_SIZE;
}
linelog_revnum linelog_getmaxrev(const linelog_buf *buf) {
linelog_inst inst0;
linelog_result r = readinst(buf, &inst0, 0);
if (r != LINELOG_RESULT_OK)
return 0;
return inst0.rev;
}
linelog_result linelog_annotate(const linelog_buf *buf,
linelog_annotateresult *ar, linelog_revnum rev) {
linelog_inst inst0;
returnonerror(readinst(buf, &inst0, 0));
linelog_offset pc, nextpc = 1, endoffset = 0;
linelog_linenum linenum = 0;
size_t step = (size_t)inst0.offset;
while ((pc = nextpc++) != 0 && --step) {
linelog_inst i;
returnonerror(readinst(buf, &i, pc));
switch (i.opcode) {
case JGE: case JL: /* conditional jump */
if (i.opcode == JGE ? rev >= i.rev : rev < i.rev) {
nextpc = i.offset;
if (nextpc == 0) /* met the END marker */
endoffset = pc;
}
break;
case LINE: /* append a line */
{
linelog_lineinfo info = {i.rev, i.linenum, pc};
returnonerror(reservelines(ar, linenum + 1));
ar->lines[linenum++] = info;
}
break;
default: /* unknown opcode */
return LINELOG_RESULT_EILLDATA;
}
}
if (endoffset == 0) /* didn't meet a valid END marker */
return LINELOG_RESULT_EILLDATA;
/* ar->lines[ar->linecount].offset records the endoffset */
returnonerror(reservelines(ar, linenum + 1));
linelog_lineinfo endlineinfo = { .offset = endoffset };
ar->lines[linenum] = endlineinfo;
ar->linecount = linenum;
return LINELOG_RESULT_OK;
}
static linelog_result replacelines(linelog_buf *buf, linelog_annotateresult *ar,
linelog_revnum brev, linelog_linenum a1, linelog_linenum a2,
linelog_linenum b1, linelog_linenum b2,
const linelog_revnum *brevs, const linelog_linenum *blinenums) {
/* buf before after
-------- --------
.... ....
a1addr > (a1inst) JGE 0 oldlen [5]
a1addr+1 > ... ...
.... ....
a2addr > ... ...
.... ....
oldlen > (end) JL brev pjge [1]
LINE brev b1 [1]
LINE brev b1+1 [1]
.... [1]
LINE brev b2-1 [1]
pjge > JGE brev a2addr [2]
a1newaddr > (a1inst) [3]
JGE 0 a1addr+1 [4]
newlen > (end)
[1]: insert new lines. only exist if b1 < b2
[2]: delete old lines. only exist if a1 < a2
[3]: move a1inst to new place, as it will be rewritten in [5]
[4]: jump back. only exist if a1inst is not an unconditional jump
[5]: rewrite the old instruction to jump to the new block */
/* sanity check */
linelog_inst inst0;
returnonerror(readinst(buf, &inst0, 0));
if (brev >= MAX_REVNUM || a2 >= MAX_LINENUM || b2 >= MAX_LINENUM)
return LINELOG_RESULT_EOVERFLOW;
if (a2 < a1 || b2 < b1 || !ar || a2 > ar->linecount)
return LINELOG_RESULT_EILLDATA;
/* useful variables for both step I and III */
linelog_offset oldlen = inst0.offset;
linelog_offset a1addr = ar->lines[a1].offset;
linelog_inst a1inst;
returnonerror(readinst(buf, &a1inst, a1addr));
bool a1instisjge0 = (a1inst.opcode == JGE && a1inst.rev == 0);
/* step I: reserve size for buf: (newlen - oldlen) more instructions */
linelog_loffset newlen = (linelog_loffset)oldlen
+ (b2 - b1 /* LINE */ + (b2 > b1) /* JL brev */) /* [1] */
+ (a2 > a1) /* JGE brev */ /* [2] */
+ 1 /* a1inst */ /* [3] */
+ (a1instisjge0 ? 0 : 1) /* JGE 0 */ /* [4] */;
if (newlen >= MAX_OFFSET)
return LINELOG_RESULT_EOVERFLOW;
size_t neededsize = (size_t)newlen * INST_SIZE;
if (neededsize > buf->size) {
buf->neededsize = neededsize;
return LINELOG_RESULT_ENEEDRESIZE;
}
/* step II: reserve space for annotateresult */
linelog_llinenum newlinecount =
(linelog_llinenum)ar->linecount + b2 - b1 - (a2 - a1);
returnonerror(reservelines(ar, newlinecount + 1));
assert(ar->linecount < ar->maxlinecount);
/* writeinst should not fail for remaining steps - we have reserved
enough space. any failure will be a huge headache for the caller. */
/* step III: update linelog_buf */
#define appendinst(inst) \
mustsuccess(writeinst(buf, &inst, inst0.offset++));
if (b1 < b2) { /* [1] */
linelog_offset pjge = oldlen + (b2 - b1 + 1);
linelog_inst jl = { .opcode = JL, .rev = brev, .offset = pjge };
appendinst(jl);
for (linelog_linenum i = b1; i < b2; ++i) {
linelog_inst lineinst = { .opcode = LINE,
.rev = brevs ? brevs[i] : brev,
.linenum = blinenums ? blinenums[i] : i };
appendinst(lineinst);
}
}
if (a1 < a2) { /* [2] */
linelog_inst jge = { .opcode = JGE, .rev = brev,
.offset = ar->lines[a2].offset };
appendinst(jge);
}
linelog_offset a1newaddr = inst0.offset;
appendinst(a1inst); /* [3] */
if (!a1instisjge0) { /* [4] */
linelog_inst ret = { .opcode = JGE, .offset = a1addr + 1 };
appendinst(ret);
}
#undef appendinst
linelog_inst jge0 = { .opcode = JGE, .rev = 0, .offset = oldlen };
mustsuccess(writeinst(buf, &jge0, a1addr)); /* [5] */
/* step IV: write back updated inst0 */
if (brev > inst0.rev)
inst0.rev = brev;
mustsuccess(writeinst(buf, &inst0, 0));
/* step V: update annotateresult */
ar->lines[a1].offset = a1newaddr; /* a1inst got moved */
if (a2 - a1 != b2 - b1) {
size_t movesize = sizeof(linelog_lineinfo) *
(ar->linecount + 1 - a2);
/* the memmove is safe as step II reserved the memory */
memmove(ar->lines + a1 + b2 - b1, ar->lines + a2, movesize);
ar->linecount = (linelog_linenum)newlinecount;
}
for (linelog_linenum i = b1; i < b2; ++i) {
linelog_lineinfo *li = ar->lines + a1 + i - b1;
li->rev = brevs ? brevs[i] : brev;
li->linenum = blinenums ? blinenums[i] : i;
li->offset = oldlen + i - b1 + 1;
}
return LINELOG_RESULT_OK;
}
linelog_result linelog_replacelines(linelog_buf *buf,
linelog_annotateresult *ar, linelog_revnum brev,
linelog_linenum a1, linelog_linenum a2,
linelog_linenum b1, linelog_linenum b2) {
return replacelines(buf, ar, brev, a1, a2, b1, b2, NULL, NULL);
}
linelog_result linelog_replacelines_vec(linelog_buf *buf,
linelog_annotateresult *ar, linelog_revnum brev,
linelog_linenum a1, linelog_linenum a2,
linelog_linenum blinecount, const linelog_revnum *brevs,
const linelog_linenum *blinenums) {
return replacelines(buf, ar, brev, a1, a2, 0, blinecount,
brevs, blinenums);
}