LaTeX-Workshop/dev/pyintel/pkgcommand.py

import json
import urllib.request
import re
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Union

PKGS_IGNORE_KEYVALS = []

@dataclass
class KeyVal:
    key: str
    snippet: str

@dataclass
class Cmd:
    snippet: Union[str, None]
    option: Union[str, None]
    keyvalindex: Union[int, None]
    keyvalpos: Union[int, None]
    detail: Union[str, None]
    documentation: Union[str, None]

@dataclass
class Env:
    name: Union[str, None]
    snippet: Union[str, None]
    option: Union[str, None]
    keyvalindex: Union[int, None]
    keyvalpos: Union[int, None]

@dataclass
class Pkg:
    includes: Dict[str, List[str]]
    cmds: Dict[str, Cmd]
    envs: Dict[str, Env]
    options: List[str]
    keyvals: List[List[str]]

def create_snippet(line: str) -> str:
    """
    Create a placeholder for every argument [], {}
    """
    snippet = line
    curly_index = line.find('{')
    square_index = line.find('[')
    p = PlaceHolder()
    if square_index < curly_index:
        # If all the optional args are before {}, we number the {} first
        snippet = re.sub(r'(\{)([^\{\$]*)(\})', p.sub, snippet)
        snippet = re.sub(r'(\[)([^\[\$]*)(\])', p.sub, snippet)
    else:
        snippet = re.sub(r'(\{|\[)([^\{\[\$]*)(\}|\])', p.sub, snippet)
    snippet = re.sub(r'(?<![\{\s:\[])(\<)([a-zA-Z\s]*)(\>)', p.sub, snippet)
    snippet = re.sub(r'(\()([^\{\}\[\]\(\)]*)(\))', p.sub, snippet)
    p.setKeepDelimiters(False)
    snippet = re.sub(r'(?<![\{:\[=-])(%\<)([a-zA-Z\s]*)(%\>)(?!})', p.sub, snippet)

    t = TabStop()
    snippet = re.sub(r'(?<![\. ])\.\.(?![\. ])', t.sub, snippet)

    snippet = re.sub(r'%keyvals', '', snippet)
    snippet = re.sub(r'%<([^%]*?)%:.*?%>', r'\1', snippet)
    snippet = re.sub(r'%<([^%]*?)%>', r'\1', snippet)
    snippet = re.sub(r'\$\{(\d+:.*?)%.*?\}', r'${\1}', snippet)
    return snippet


class TabStop:
    """
    Count tab stops inside a regex and make the appropriate substitution

    :count: The number of tabstops that have already been replaced.
    """


    def __init__(self):
        self.count = 0

    def sub(self, _matchObject) -> str:
        self.count += 1
        return '${' + str(self.count) + '}'


class PlaceHolder:
    """
    Count placeholders and make the proper substitutions

    :count: The number of tabstops that have already been replaced.
    :usePlaceHolders: When True, keep the placeholder name in the snippet
    :keepDelimiters: When True, keep the delimiters (usually {} or []) surrounding every placeholder
    """

    def __init__(self):
        self.count = 0
        self.usePlaceHolders = True
        self.keepDelimiters = True

    def setUsePlaceHolders(self, trueOrFalse: bool):
        self.usePlaceHolders = trueOrFalse

    def setKeepDelimiters(self, trueOrFalse: bool):
        self.keepDelimiters = trueOrFalse

    def isToSkip(self, delimiters: str, string: str):
        if delimiters == '()' and string in ['s', 'en anglais', 'en français']:
            return True
        else:
            return False

    def sub(self, matchObject) -> str:
        if self.isToSkip(matchObject.group(1) + matchObject.group(3), matchObject.group(2)):
            return matchObject.group(1) + matchObject.group(2) + matchObject.group(3)

        self.count += 1
        name = ''
        if self.usePlaceHolders:
            name = ':' + matchObject.group(2)
        if self.keepDelimiters:
            return matchObject.group(1) + '${' + str(self.count) + name + '}' + matchObject.group(3)
        else:
            return '${' + str(self.count) + name + '}'

def apply_caption_tweaks(content: List[str]) -> List[str]:
    return [re.sub(r'#([0-9])', r'arg\1', line, flags=re.A) for line in content]


class CwlIntel:
    """
    Parse a CWL file to generate intellisense data in JSON format

    :unimath_dict: Dictionnary of unimathsymbols
    """

    def __init__(self, commands_file: Union[Path, str], envs_file: Union[Path, str], unimathsymbols: Union[Path, str]):
        """
        :param commands_file: Path to the JSON file contaning the default commands
        :param envs_file: Path to the JSON file contaning the default environments
        :param unimathsymbols: Path to unimathsymbols.txt. If the file exists, it
        is read from this location. If not, it is retrieved from
        http://milde.users.sourceforge.net/LUCR/Math/data/ and written to this location.
        """
        self.unimath_dict: Dict[str, Dict[str, str]] = {}
        self.unimathsymbols = Path(unimathsymbols)
        try:
            self.commands = json.load(open(commands_file, encoding='utf8'))
        except (OSError, json.JSONDecodeError):
            print(f'Cannot read JSON file {commands_file}')
            self.commands = []
        try:
            self.envs = json.load(open(envs_file, encoding='utf8'))
        except (OSError, json.JSONDecodeError):
            print(f'Cannot read JSON file {envs_file}')
            self.envs = []
        self.compute_unimathsymbols()


    def compute_unimathsymbols(self) -> Dict[str, Dict[str, str]]:
        """
        Create a dictionnary of unmimathsymbols
        """
        if not self.unimathsymbols.exists():
            urllib.request.urlretrieve('http://milde.users.sourceforge.net/LUCR/Math/data/unimathsymbols.txt', self.unimathsymbols)
        with self.unimathsymbols.open(encoding='utf8') as f:
            lines = f.readlines()
        for line in lines:
            cmds: List[str] = []
            if line[0] == '#':
                continue
            line = line.strip()
            arry = line.split('^')
            cmds.append(re.sub(r'^\\', '', arry[2]))
            cmds.append(re.sub(r'^\\', '', arry[3]))
            for m in re.finditer(r'= \\(\w+)[ ,]', arry[-1]):
                cmds.append(m.group(1))
            doc = re.sub(r'\s*[=#xt]\s*\\\w+(\{.*?\})?\s*(\(.*?\))?\s*,', '', arry[-1])
            doc = re.sub(r'\s*[=#xt]\s*\S+\s*,', '', doc)
            doc = doc.strip()
            for c in cmds:
                if c == '' or re.search('{', c):
                    continue
                self.unimath_dict[c] = {'detail': arry[1], 'documentation': doc}


    def parse_cwl_file(self, file_path: Union[Path, str], remove_spaces: bool = False) -> Pkg:
        """
        Parse a CWL file to extract the provided commands and environments

        :param file_path: Path to the .cwl file to parse
        :param remove_spaces: If true, spaces are removed to compute the name of the snippet
        """
        if isinstance(file_path, str):
            file_path = Path(file_path)
        if not file_path.exists():
            print(f'File {file_path.as_posix} does not exist')
            return ({}, {})
        with file_path.open(encoding='utf8') as f:
            lines = f.readlines()
        pkg = Pkg(includes={}, cmds={}, envs={}, options=[], keyvals=[])
        if file_path.name == 'caption.cwl':
            lines = apply_caption_tweaks(lines)

        cwl_keyval = None
        cwl_option = None
        for line in lines:
            line = line.rstrip()
            if len(line) == 0:                      # empty line
                continue
            elif line.startswith('#include:'):      # '#include:keyval'
                if (line[9:] not in pkg.includes):  # 'keyval'
                    pkg.includes[line[9:]] = []
                if (cwl_option is not None):
                    pkg.includes[line[9:]].append(cwl_option)
            elif line.startswith('#ifOption:'):     # '#ifOption:newfloat=true'
                cwl_option = line[10:]              # 'newfloat=true'
            elif line.startswith('#endif'):         # '#endif'
                cwl_option = None
            elif line.startswith('#keyvals:\\usepackage/'): # '#keyvals:\usepackage/color#c'
                cwl_keyval = 'PACKAGE_OPTIONS'
            elif line.startswith('#keyvals:\\documentclass/'): # '#keyvals:\usepackage/color#c'
                cwl_keyval = 'PACKAGE_OPTIONS'
            elif line.startswith('#keyvals:'):      # '#keyvals:\begin{minted},\mint,\inputminted'
                cwl_keyval = line[9:]               # '\begin{minted},\mint,\inputminted'
            elif line.startswith('#endkeyvals'):    # '#endkeyvals'
                cwl_keyval = None
            elif line.startswith('#'):
                continue
            elif line.startswith('\\begin{'):       # '\begin{minted}[options%keyvals]#S'
                match = re.match(r'\\begin{(.*?)}([^#\n]*)#?(.*)$', line)
                if match is None:
                    continue
                if len(match.groups()) >= 2 and match[2]:
                    name = match[1] + re.sub(r'(\{|\[)[^\{\[\$]*(\}|\])', r'\1\2', match[2])
                else:
                    name = match[1]
                name = re.sub(r'\<[a-zA-Z\s]*\>', '<>', name)
                if remove_spaces:
                    name = name.replace(' ', '')
                else:
                    name = name.strip()
                # The name field can only contain letters, `{`, `}`, `[`, `]` and `*`.
                # https://github.com/James-Yu/LaTeX-Workshop/issues/3264#issuecomment-1138733921
                if re.match(r'[^A-Za-z\[\]\{\}\<\>\*\s]', name) is not None or '%' in name:
                    continue
                snippet = create_snippet(match[2] if len(match.groups()) >= 2 and match[2] else '')
                pkg.envs[name] = Env(
                    name=None if name == match[1] else match[1],
                    snippet=None if snippet == '' else snippet,
                    option=cwl_option,
                    keyvalindex=None,
                    keyvalpos=None)
            elif line.startswith('\\end{'):         # '\end{minted}'
                continue
            elif line.startswith('\\'):             # '\inputminted[options%keyvals]{language}{file}#i'
                match = re.match(r'\\([^[\{\n]*?)((?:\{|\[)[^#\n]*)?(#.*)?$', line)
                if match is None:
                    continue
                if len(match.groups()) >= 2 and match[2]:
                    name = match[1] + re.sub(r'(\{|\[)[^\{\[\$]*(\}|\])', r'\1\2', match[2])
                else:
                    name = match[1]
                name = re.sub(r'\([^\{\}\[\]\(\)]*\)', r'()', name)
                name = re.sub(r'\<[a-zA-Z\s]*\>', '<>', name)
                name = re.sub(r'\|.*?\|', '', name) # Remove |%<code%>| from '\mintinline[%<options%>]{%<language%>}|%<code%>|#M'
                if remove_spaces:
                    name = name.replace(' ', '')
                else:
                    name = name.strip()
                # The name field can only contain letters, `{`, `}`, `[`, `]` and `*`.
                # https://github.com/James-Yu/LaTeX-Workshop/issues/3264#issuecomment-1138733921
                if re.match(r'[^A-Za-z\[\]\{\}\<\>\*\s]', name) is not None or '(' in name or ')' in name or '\\' in name or '%' in name:
                    continue
                if name in self.commands:
                    continue
                snippet = create_snippet(match[1] + (match[2] if len(match.groups()) >= 2 and match[2] else ''))
                detail = self.unimath_dict[name]['detail'] if self.unimath_dict.get(name) else None
                documentation = self.unimath_dict[name]['documentation'] if self.unimath_dict.get(name) else None
                pkg.cmds[name] = Cmd(
                    snippet=None if name == snippet else snippet,
                    option=cwl_option,
                    keyvalindex=None,
                    keyvalpos=None,
                    detail=detail,
                    documentation=documentation)
            elif cwl_keyval == 'PACKAGE_OPTIONS':
                for i in range(len(re.findall(r'%<([^%]*?)%>', line))):
                    line = re.sub(r'%<([^%]*?)%>', '${' + str(i + 1) + r':\1}', line, 1)
                match = re.match(r'^([^#%\n]*)', line)
                if match is None:
                    continue
                pkg.options.append(match[1])
            elif cwl_keyval is not None and file_path.stem not in PKGS_IGNORE_KEYVALS:
                for i in range(len(re.findall(r'%<([^%]*?)%>', line))):
                    line = re.sub(r'%<([^%]*?)%>', '${' + str(i + 1) + r':\1}', line, 1)
                match = re.match(r'^([^#\n]*)', line)
                if match is None:
                    continue
                for envcmd in cwl_keyval.split(','):
                    if envcmd.startswith('\\begin{'):
                        env = re.match(r'\\begin{(.*?)}', envcmd)[1]
                        for pkgenv in pkg.envs:
                            if (pkg.envs[pkgenv].name != env):
                                continue
                            haskeyvals = re.search(r':keys|:keyvals|:options|:library', pkg.envs[pkgenv].snippet)
                            if (haskeyvals is None):
                                continue
                            if (pkg.envs[pkgenv].keyvalpos is None):
                                pkg.envs[pkgenv].keyvalpos = len(re.findall(r'\[\]|\(\)|<>|{}', re.sub(r'\${.*?}', '', pkg.envs[pkgenv].snippet[:haskeyvals.start()])))
                            pkg.envs[pkgenv].keyvalindex = pkg.envs[pkgenv].keyvalindex or []
                            pkg.envs[pkgenv].keyvalindex.append(match[1])
                    else:
                        cmd = re.match(r'\\?([^{\[#]*)', envcmd)[1]
                        for pkgcmd in pkg.cmds:
                            if (re.sub(r'\[\]|\(\)|<>|{}', '', pkgcmd) != cmd):
                                continue
                            haskeyvals = re.search(r':keys|:keyvals|:options|:library', pkg.cmds[pkgcmd].snippet or pkgcmd)
                            if (haskeyvals is None):
                                continue
                            if (pkg.cmds[pkgcmd].keyvalpos is None):
                                pkg.cmds[pkgcmd].keyvalpos = len(re.findall(r'\[\]|\(\)|<>|{}', re.sub(r'\${.*?}', '', pkg.cmds[pkgcmd].snippet[:haskeyvals.start()])))
                            pkg.cmds[pkgcmd].keyvalindex = pkg.cmds[pkgcmd].keyvalindex or []
                            pkg.cmds[pkgcmd].keyvalindex.append(match[1])

        for pkgcmd in pkg.cmds:
            if pkg.cmds[pkgcmd].keyvalindex is None:
                continue
            keyvalset = set(pkg.cmds[pkgcmd].keyvalindex)
            found = False
            for idx, cand in enumerate(pkg.keyvals):
                candset = set(cand)
                if (keyvalset == candset):
                    found = True
                    pkg.cmds[pkgcmd].keyvalindex = idx
                    break
            if not found:
                pkg.keyvals.append(pkg.cmds[pkgcmd].keyvalindex)
                pkg.cmds[pkgcmd].keyvalindex = len(pkg.keyvals) - 1

        for pkgenv in pkg.envs:
            if pkg.envs[pkgenv].keyvalindex is None:
                continue
            keyvalset = set(pkg.envs[pkgenv].keyvalindex)
            found = False
            for idx, cand in enumerate(pkg.keyvals):
                candset = set(cand)
                if (keyvalset == candset):
                    found = True
                    pkg.envs[pkgenv].keyvalindex = idx
                    break
            if not found:
                pkg.keyvals.append(pkg.envs[pkgenv].keyvalindex)
                pkg.envs[pkgenv].keyvalindex = len(pkg.keyvals) - 1

        return pkg