hurl/bin/docs/build_man_md.py

#!/usr/bin/env python3
"""Build Grammar Markdown Manual File.

This script converts Hurl manual file to Markdown suitable for the Hurl canonical docs.

This tool takes the Hurl manual file as a first argument.

Examples:
    $ python3 bin/docs/build_man_md.py docs/manual/hurl.md > docs/manual.md

"""
import re
import sys
from pathlib import Path
from typing import List

from markdown import (
    parse_markdown,
    MarkdownDoc,
    Header,
    Paragraph,
    Whitespace,
    Node,
)


def normalize_h2(doc: MarkdownDoc) -> None:
    h2s = [h for h in doc.children if isinstance(h, Header) and h.level == 2]
    for h2 in h2s:
        # Add exception for www acronym
        if h2.title == "WWW":
            continue
        h2.title = h2.title.title()
        h2.update_content()


def process_table(doc: MarkdownDoc, nodes: List[Node], col_name: str) -> None:
    """Transform the list of items from the source manual document to a beautiful HTML tables.

    This can be used to transform options, variables and environment sections.
    """

    def escape(s):
        return s.replace("<", "&lt;").replace(">", "&gt;")

    new_nodes = [
        Whitespace(content="\n"),
        Paragraph(content=f"| {col_name} | Description |\n| --- | --- |\n"),
    ]

    h3s = [n for n in nodes if isinstance(n, Header)]
    for h3 in h3s:
        name_raw = h3.title

        # Try to match name and anchor
        r = re.compile(r"""(.+) \{#(.+)}""")
        m = r.match(name_raw)
        if m:
            _id = m.group(2)
            text = escape(m.group(1))
            name = f'<a href="#{_id}" id="{_id}"><code>{text}</code></a>'
        else:
            name = f"`{name_raw}`"

        next_h = doc.find_first(
            lambda it: isinstance(it, Header), start=doc.next_node(h3)
        )
        first_p = doc.find_first(
            lambda it: isinstance(it, Paragraph), start=doc.next_node(h3)
        )
        assert first_p is not None
        last_p = next_h
        while last_p and not isinstance(last_p, Paragraph):
            last_p = doc.previous_node(last_p)
        assert last_p is not None
        next_node = doc.next_node(last_p)
        assert next_node is not None
        paragraphs = doc.slice(first_p, next_node)
        paragraphs_contents = [p.content for p in paragraphs if p.content]
        description = "".join(paragraphs_contents)
        description = description.replace("\n", "<br>")

        new_node = Paragraph(content=f"| {name} | {description} |\n")
        new_nodes.append(new_node)

    # Delete all previous options:
    previous_node = doc.previous_node(nodes[0])
    assert previous_node is not None
    doc.insert_nodes(start=previous_node, nodes=new_nodes)
    doc.remove_nodes(nodes)


def main():
    input_file = sys.argv[1]
    src = Path(input_file).read_text()

    man = parse_markdown(text=src)

    normalize_h2(man)

    # Transform all h3 options, environment var and exit code to tables
    options_h2 = man.find_first(
        lambda it: isinstance(it, Header) and it.title == "Options"
    )
    environment_h2 = man.find_first(
        lambda it: isinstance(it, Header) and it.title == "Environment"
    )
    exit_codes_h2 = man.find_first(
        lambda it: isinstance(it, Header) and it.title == "Exit Codes"
    )
    www_h2 = man.find_first(lambda it: isinstance(it, Header) and it.title == "WWW")

    first_option_h3 = man.find_first(
        lambda it: isinstance(it, Header) and it.level == 3, start=options_h2
    )
    options = man.slice(first_option_h3, environment_h2)
    process_table(doc=man, nodes=options, col_name="Option")

    first_env_h3 = man.find_first(
        lambda it: isinstance(it, Header) and it.level == 3, start=environment_h2
    )
    envs = man.slice(first_env_h3, exit_codes_h2)
    process_table(doc=man, nodes=envs, col_name="Variable")

    first_exit_h3 = man.find_first(
        lambda it: isinstance(it, Header) and it.level == 3, start=exit_codes_h2
    )
    exits = man.slice(first_exit_h3, www_h2)
    process_table(doc=man, nodes=exits, col_name="Value")

    print("# Manual\n\n" + man.to_text())


if __name__ == "__main__":
    main()
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00			`#!/usr/bin/env python3`
Changing to manual vs man in documenation In intro documentation, it can be confusing to see just `man` for the reference to reference manual pages and documentation. I believe this is a reference to Unix man pages (manual pages), but markdown is much more than what those ASCII text displays could do. To help understandability, I am submitting a PR to update some documentation references to manual over man. 2022-08-31 19:14:45 +03:00			`"""Build Grammar Markdown Manual File.`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00
Changing to manual vs man in documenation In intro documentation, it can be confusing to see just `man` for the reference to reference manual pages and documentation. I believe this is a reference to Unix man pages (manual pages), but markdown is much more than what those ASCII text displays could do. To help understandability, I am submitting a PR to update some documentation references to manual over man. 2022-08-31 19:14:45 +03:00			`This script converts Hurl manual file to Markdown suitable for the Hurl canonical docs.`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00
Changing to manual vs man in documenation In intro documentation, it can be confusing to see just `man` for the reference to reference manual pages and documentation. I believe this is a reference to Unix man pages (manual pages), but markdown is much more than what those ASCII text displays could do. To help understandability, I am submitting a PR to update some documentation references to manual over man. 2022-08-31 19:14:45 +03:00			`This tool takes the Hurl manual file as a first argument.`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00
			`Examples:`
Rename manual-page.md to manual.md 2022-09-02 15:45:54 +03:00			`$ python3 bin/docs/build_man_md.py docs/manual/hurl.md > docs/manual.md`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00
			`"""`
			`import re`
			`import sys`
			`from pathlib import Path`
			`from typing import List`

			`from markdown import (`
			`parse_markdown,`
			`MarkdownDoc,`
			`Header,`
			`Paragraph,`
			`Whitespace,`
			`Node,`
			`)`


			`def normalize_h2(doc: MarkdownDoc) -> None:`
			`h2s = [h for h in doc.children if isinstance(h, Header) and h.level == 2]`
			`for h2 in h2s:`
			`# Add exception for www acronym`
			`if h2.title == "WWW":`
			`continue`
			`h2.title = h2.title.title()`
			`h2.update_content()`


			`def process_table(doc: MarkdownDoc, nodes: List[Node], col_name: str) -> None:`
Changing to manual vs man in documenation In intro documentation, it can be confusing to see just `man` for the reference to reference manual pages and documentation. I believe this is a reference to Unix man pages (manual pages), but markdown is much more than what those ASCII text displays could do. To help understandability, I am submitting a PR to update some documentation references to manual over man. 2022-08-31 19:14:45 +03:00			`"""Transform the list of items from the source manual document to a beautiful HTML tables.`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00
			`This can be used to transform options, variables and environment sections.`
			`"""`

			`def escape(s):`
			`return s.replace("<", "<").replace(">", ">")`

			`new_nodes = [`
			`Whitespace(content="\n"),`
Update docs for Hurl 2.0.0. 2022-12-19 23:30:08 +03:00			`Paragraph(content=f"\| {col_name} \| Description \|\n\| --- \| --- \|\n"),`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00			`]`

			`h3s = [n for n in nodes if isinstance(n, Header)]`
			`for h3 in h3s:`
			`name_raw = h3.title`

			`# Try to match name and anchor`
			`r = re.compile(r"""(.+) \{#(.+)}""")`
			`m = r.match(name_raw)`
			`if m:`
			`_id = m.group(2)`
			`text = escape(m.group(1))`
			`name = f'<a href="#{_id}" id="{_id}"><code>{text}</code></a>'`
			`else:`
			name = f"`{name_raw}`"

			`next_h = doc.find_first(`
			`lambda it: isinstance(it, Header), start=doc.next_node(h3)`
			`)`
			`first_p = doc.find_first(`
			`lambda it: isinstance(it, Paragraph), start=doc.next_node(h3)`
			`)`
			`assert first_p is not None`
			`last_p = next_h`
			`while last_p and not isinstance(last_p, Paragraph):`
			`last_p = doc.previous_node(last_p)`
			`assert last_p is not None`
			`next_node = doc.next_node(last_p)`
			`assert next_node is not None`
			`paragraphs = doc.slice(first_p, next_node)`
			`paragraphs_contents = [p.content for p in paragraphs if p.content]`
			`description = "".join(paragraphs_contents)`
Update docs for Hurl 2.0.0. 2022-12-19 23:30:08 +03:00			`description = description.replace("\n", "<br>")`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00
Update docs for Hurl 2.0.0. 2022-12-19 23:30:08 +03:00			`new_node = Paragraph(content=f"\| {name} \| {description} \|\n")`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00			`new_nodes.append(new_node)`

			`# Delete all previous options:`
			`previous_node = doc.previous_node(nodes[0])`
			`assert previous_node is not None`
			`doc.insert_nodes(start=previous_node, nodes=new_nodes)`
			`doc.remove_nodes(nodes)`


			`def main():`
			`input_file = sys.argv[1]`
			`src = Path(input_file).read_text()`

			`man = parse_markdown(text=src)`

			`normalize_h2(man)`

			`# Transform all h3 options, environment var and exit code to tables`
			`options_h2 = man.find_first(`
			`lambda it: isinstance(it, Header) and it.title == "Options"`
			`)`
			`environment_h2 = man.find_first(`
			`lambda it: isinstance(it, Header) and it.title == "Environment"`
			`)`
			`exit_codes_h2 = man.find_first(`
			`lambda it: isinstance(it, Header) and it.title == "Exit Codes"`
			`)`
			`www_h2 = man.find_first(lambda it: isinstance(it, Header) and it.title == "WWW")`

			`first_option_h3 = man.find_first(`
			`lambda it: isinstance(it, Header) and it.level == 3, start=options_h2`
			`)`
			`options = man.slice(first_option_h3, environment_h2)`
			`process_table(doc=man, nodes=options, col_name="Option")`

			`first_env_h3 = man.find_first(`
			`lambda it: isinstance(it, Header) and it.level == 3, start=environment_h2`
			`)`
			`envs = man.slice(first_env_h3, exit_codes_h2)`
			`process_table(doc=man, nodes=envs, col_name="Variable")`

			`first_exit_h3 = man.find_first(`
			`lambda it: isinstance(it, Header) and it.level == 3, start=exit_codes_h2`
			`)`
			`exits = man.slice(first_exit_h3, www_h2)`
			`process_table(doc=man, nodes=exits, col_name="Value")`

Changing naming from Manual Page to just Manual Where it made sense in the titles, and references in the documentation, I have updated to just `Manual` over `Manual Page` for clarity on the purpose of the section or documentation. I have left the file naming alone, since that was mostly used in the links and felt like the titles might be where more confusion could be at. 2022-09-02 01:29:23 +03:00			`print("# Manual\n\n" + man.to_text())`
Add scripts to construct GitHub and crates.io README, and to construct grammar and man canonical document from the source. 2022-05-31 15:37:58 +03:00

			`if __name__ == "__main__":`
			`main()`