# Portions Copyright (c) Facebook, Inc. and its affiliates. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2. # dagparser.py - parser and generator for concise description of DAGs # # Copyright 2010 Peter Arrenbrecht # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import re import string from bindings import vlq from . import error, pycompat, util from .i18n import _ from .pycompat import range def parsedag(desc): '''parses a DAG from a concise textual description; generates events "+n" is a linear run of n nodes based on the current default parent "." is a single node based on the current default parent "$" resets the default parent to -1 (implied at the start); otherwise the default parent is always the last node created ">> len(list(parsedag(b""" ... ... +3 # 3 nodes in linear run ... :forkhere # a label for the last of the 3 nodes from above ... +5 # 5 more nodes on one branch ... :mergethis # label again ... >> list(parsedag(b"")) [] A simple linear run: >>> list(parsedag(b"+3")) [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))] Some non-standard ways to define such runs: >>> list(parsedag(b"+1+2")) [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))] >>> list(parsedag(b"+1*1*")) [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))] >>> list(parsedag(b"*")) [('n', (0, [-1]))] >>> list(parsedag(b"...")) [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))] A fork and a join, using numeric back references: >>> list(parsedag(b"+2*2*/2")) [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [0])), ('n', (3, [2, 1]))] >>> list(parsedag(b"+2<2+1/2")) [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [0])), ('n', (3, [2, 1]))] Placing a label: >>> list(parsedag(b"+1 :mylabel +1")) [('n', (0, [-1])), ('l', (0, 'mylabel')), ('n', (1, [0]))] An empty label (silly, really): >>> list(parsedag(b"+1:+1")) [('n', (0, [-1])), ('l', (0, '')), ('n', (1, [0]))] Fork and join, but with labels instead of numeric back references: >>> list(parsedag(b"+1:f +1:p2 *f */p2")) [('n', (0, [-1])), ('l', (0, 'f')), ('n', (1, [0])), ('l', (1, 'p2')), ('n', (2, [0])), ('n', (3, [2, 1]))] >>> list(parsedag(b"+1:f +1:p2 >> list(parsedag(b"+1 $ +1")) [('n', (0, [-1])), ('n', (1, [-1]))] Annotations, which are meant to introduce sticky state for subsequent nodes: >>> list(parsedag(b"+1 @ann +1")) [('n', (0, [-1])), ('a', 'ann'), ('n', (1, [0]))] >>> list(parsedag(b'+1 @"my annotation" +1')) [('n', (0, [-1])), ('a', 'my annotation'), ('n', (1, [0]))] Commands, which are meant to operate on the most recently created node: >>> list(parsedag(b"+1 !cmd +1")) [('n', (0, [-1])), ('c', 'cmd'), ('n', (1, [0]))] >>> list(parsedag(b'+1 !"my command" +1')) [('n', (0, [-1])), ('c', 'my command'), ('n', (1, [0]))] >>> list(parsedag(b'+1 !!my command line\\n +1')) [('n', (0, [-1])), ('C', 'my command line'), ('n', (1, [0]))] Comments, which extend to the end of the line: >>> list(parsedag(b'+1 # comment\\n+1')) [('n', (0, [-1])), ('n', (1, [0]))] Error: >>> try: list(parsedag(b'+1 bad')) ... except Exception as e: print(bytes(e)) invalid character in dag description: bad... ''' if not desc: return wordchars = pycompat.bytestr(string.ascii_letters + string.digits) labels = {} p1 = -1 r = 0 def resolve(ref): if not ref: return p1 elif ref[0] in pycompat.bytestr(string.digits): return r - int(ref) else: return labels[ref] chiter = iter(desc) def nextch(): return next(chiter, "\0") def nextrun(c, allow): s = "" while c in allow: s += c c = nextch() return c, s def nextdelimited(c, limit, escape): s = "" while c != limit: if c == escape: c = nextch() s += c c = nextch() return nextch(), s def nextstring(c): if c == '"': return nextdelimited(nextch(), '"', "\\") else: return nextrun(c, wordchars) c = nextch() while c != "\0": while c in pycompat.bytestr(string.whitespace): c = nextch() if c == ".": yield "n", (r, [p1]) p1 = r r += 1 c = nextch() elif c == "+": c, digs = nextrun(nextch(), pycompat.bytestr(string.digits)) n = int(digs) for i in range(0, n): yield "n", (r, [p1]) p1 = r r += 1 elif c in "*/": if c == "*": c = nextch() c, pref = nextstring(c) prefs = [pref] while c == "/": c, pref = nextstring(nextch()) prefs.append(pref) ps = [resolve(ref) for ref in prefs] yield "n", (r, ps) p1 = r r += 1 elif c == "<": c, ref = nextstring(nextch()) p1 = resolve(ref) elif c == ":": c, name = nextstring(nextch()) labels[name] = p1 yield "l", (p1, name) elif c == "@": c, text = nextstring(nextch()) yield "a", text elif c == "!": c = nextch() if c == "!": cmd = "" c = nextch() while c not in "\n\r\0": cmd += c c = nextch() yield "C", cmd else: c, cmd = nextstring(c) yield "c", cmd elif c == "#": while c not in "\n\r\0": c = nextch() elif c == "$": p1 = -1 c = nextch() elif c == "\0": return # in case it was preceded by whitespace else: s = "" i = 0 while c != "\0" and i < 10: s += c i += 1 c = nextch() raise error.Abort(_("invalid character in dag description: " "%s...") % s) def dagtextlines( events, addspaces=True, wraplabels=False, wrapannotations=False, wrapcommands=False, wrapnonlinear=False, usedots=False, maxlinewidth=70, ): """generates single lines for dagtext()""" def wrapstring(text): if re.match("^[0-9a-z]*$", text): return text return '"' + text.replace("\\", "\\\\").replace('"', '"') + '"' def gen(): labels = {} run = 0 wantr = 0 needroot = False for kind, data in events: if kind == "n": r, ps = data # sanity check if r != wantr: raise error.Abort(_("expected id %i, got %i") % (wantr, r)) if not ps: ps = [-1] else: for p in ps: if p >= r: raise error.Abort( _("parent id %i is larger than " "current id %i") % (p, r) ) wantr += 1 # new root? p1 = r - 1 if len(ps) == 1 and ps[0] == -1: if needroot: if run: yield "+%d" % run run = 0 if wrapnonlinear: yield "\n" yield "$" p1 = -1 else: needroot = True if len(ps) == 1 and ps[0] == p1: if usedots: yield "." else: run += 1 else: if run: yield "+%d" % run run = 0 if wrapnonlinear: yield "\n" prefs = [] for p in ps: if p == p1: prefs.append("") elif p in labels: prefs.append(labels[p]) else: prefs.append("%d" % (r - p)) yield "*" + "/".join(prefs) else: if run: yield "+%d" % run run = 0 if kind == "l": rid, name = data labels[rid] = name yield ":" + name if wraplabels: yield "\n" elif kind == "c": yield "!" + wrapstring(data) if wrapcommands: yield "\n" elif kind == "C": yield "!!" + data yield "\n" elif kind == "a": if wrapannotations: yield "\n" yield "@" + wrapstring(data) elif kind == "#": yield "#" + data yield "\n" else: raise error.Abort( _("invalid event type in dag: " "('%s', '%s')") % (util.escapestr(kind), util.escapestr(data)) ) if run: yield "+%d" % run line = "" for part in gen(): if part == "\n": if line: yield line line = "" else: if len(line) + len(part) >= maxlinewidth: yield line line = "" elif addspaces and line and part != ".": line += " " line += part if line: yield line def dagtext( dag, addspaces=True, wraplabels=False, wrapannotations=False, wrapcommands=False, wrapnonlinear=False, usedots=False, maxlinewidth=70, ): """generates lines of a textual representation for a dag event stream events should generate what parsedag() does, so: ('n', (id, [parentids])) for node creation ('l', (id, labelname)) for labels on nodes ('a', text) for annotations ('c', text) for commands ('C', text) for line commands ('!!') ('#', text) for comment lines Parent nodes must come before child nodes. Examples -------- Linear run: >>> dagtext([(b'n', (0, [-1])), (b'n', (1, [0]))]) '+2' Two roots: >>> dagtext([(b'n', (0, [-1])), (b'n', (1, [-1]))]) '+1 $ +1' Fork and join: >>> dagtext([(b'n', (0, [-1])), (b'n', (1, [0])), (b'n', (2, [0])), ... (b'n', (3, [2, 1]))]) '+2 *2 */2' Fork and join with labels: >>> dagtext([(b'n', (0, [-1])), (b'l', (0, b'f')), (b'n', (1, [0])), ... (b'l', (1, b'p2')), (b'n', (2, [0])), (b'n', (3, [2, 1]))]) '+1 :f +1 :p2 *f */p2' Annotations: >>> dagtext([(b'n', (0, [-1])), (b'a', b'ann'), (b'n', (1, [0]))]) '+1 @ann +1' >>> dagtext([(b'n', (0, [-1])), ... (b'a', b'my annotation'), ... (b'n', (1, [0]))]) '+1 @"my annotation" +1' Commands: >>> dagtext([(b'n', (0, [-1])), (b'c', b'cmd'), (b'n', (1, [0]))]) '+1 !cmd +1' >>> dagtext([(b'n', (0, [-1])), ... (b'c', b'my command'), ... (b'n', (1, [0]))]) '+1 !"my command" +1' >>> dagtext([(b'n', (0, [-1])), ... (b'C', b'my command line'), ... (b'n', (1, [0]))]) '+1 !!my command line\\n+1' Comments: >>> dagtext([(b'n', (0, [-1])), (b'#', b' comment'), (b'n', (1, [0]))]) '+1 # comment\\n+1' >>> dagtext([]) '' Combining parsedag and dagtext: >>> dagtext(parsedag(b'+1 :f +1 :p2 *f */p2')) '+1 :f +1 :p2 *f */p2' """ return "\n".join( dagtextlines( dag, addspaces, wraplabels, wrapannotations, wrapcommands, wrapnonlinear, usedots, maxlinewidth, ) ) def bindag(revs, parentrevs): """Generate binary representation for a dag revs is a list of commit identities. It must be topo-sorted from the oldest to the newest commits. parentrevs is a function that takes a commit identity, and returns a list of parent commit identities: (rev) -> [rev]. The binary format consists of a stream of VLQ-encoded integers. Every commit has an ID. The first commit created has ID K, the second has ID K+1, and so on. K does not matter, because the format uses relative reference to previous commits. To parse the binary data, read integers one by one, and handle them using the following rules: - 0: New root commit. Create a new commit that has no parents. - 1: New single-parent commit. Read the next integer as P. Create a new commit with a single parent with ID = - P. - 2: New merge commit. Read the next two integers as P, Q. Create a new commit with two parents - P, and - Q. - 3: New merge commit (fast path 1). Read the next integer as Q. Create a new commit with two parents: , and - Q. - 4: New merge commit (fast path 2). Read the next integer as P. Create a new commit with two parents: - P, and . - N: New linear stack of commits (N > 4). Create a stack of N - 4 commits on top of the last commit created. """ idmap = {} # {rev: commit id} buf = util.stringio() def push(value, encode=vlq.encode, write=buf.write): """Append an integer to the buffer""" write(encode(value)) pendingcommits = [0] def pushpending(push=push): if pendingcommits[0] > 0: push(pendingcommits[0] + 4) pendingcommits[0] = 0 for rev in revs: nextid = len(idmap) idmap[rev] = nextid p1, p2 = parentrevs(rev) if p1 == -1: assert p2 == -1 pushpending() push(0) pendingcommits[0] = 0 elif idmap[p1] + 1 == nextid and p2 == -1: pendingcommits[0] += 1 else: pushpending() lastid = nextid - 1 dp1 = lastid - idmap[p1] if p2 == -1: push(1) push(dp1) else: dp2 = lastid - idmap[p2] if dp1 == 0: push(3) push(dp2) elif dp2 == 0: push(4) push(dp1) else: push(2) push(dp1) push(dp2) pushpending() return buf.getvalue() def parsebindag(data): """Reverse of `bindag`. Translated binary DAG to revs and parentrevs. The returned revs use integer commit identities starting from 0. """ def readiter(data, decodeat=vlq.decodeat): offset = 0 while offset < len(data): value, size = decodeat(data, offset) yield value offset += size it = readiter(data) parents = [] # index: id, value: parentids append = parents.append # build dag in-memory while True: i = next(it, None) lastid = len(parents) - 1 if i is None: break elif i == 0: append(()) elif i == 1: p1 = lastid - next(it) append((p1,)) elif i == 2: p1 = lastid - next(it) p2 = lastid - next(it) append((p1, p2)) elif i == 3: p1 = lastid p2 = lastid - next(it) append((p1, p2)) elif i == 4: p1 = lastid - next(it) p2 = lastid append((p1, p2)) else: n = i - 4 while n > 0: p1 = len(parents) - 1 parents.append((p1,)) n -= 1 revs = range(len(parents)) parentrevs = parents.__getitem__ return revs, parentrevs