#!/usr/bin/env python # # docchecker - look for problematic markup # # Copyright 2016 timeless and others # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import, print_function import re import sys leadingline = re.compile(r'(^\s*)(\S.*)$') checks = [ (r""":hg:`[^`]*'[^`]*`""", """warning: please avoid nesting ' in :hg:`...`"""), (r'\w:hg:`', 'warning: please have a space before :hg:'), (r"""(?:[^a-z][^'.])hg ([^,;"`]*'(?!hg)){2}""", '''warning: please use " instead of ' for hg ... "..."'''), ] # The below needed because of how we try to figure out the hg 'arg' pattern. If # there exists a message like "word hg word d'aide word word d'aide", this # checker will break. The proper fix would be to identify command line examples # better, but it is hard to do. # ("d'aide" is present a lot in a generated docs since it's a part of # "pas d'aide disponible", French for "no help available") exception_words = ["d'aide"] def check(line): messages = [] for match, msg in checks: m = re.search(match, line) if m: for ew in exception_words: if ew in m.group(): break else: messages.append(msg) if messages: print(line) for msg in messages: print(msg) def work(file): (llead, lline) = ('', '') for line in file: # this section unwraps lines match = leadingline.match(line) if not match: check(lline) (llead, lline) = ('', '') continue lead, line = match.group(1), match.group(2) if (lead == llead): if (lline != ''): lline += ' ' + line else: lline = line else: check(lline) (llead, lline) = (lead, line) check(lline) def main(): for f in sys.argv[1:]: try: with open(f) as file: work(file) except BaseException as e: print("failed to process %s: %s" % (f, e)) main()