Add script for standalone Markdown generation.

2024-11-23 00:44:55 +03:00 · 2024-09-14 10:57:33 +02:00 · 2024-09-14 10:57:33 +02:00 · ce2e2e9095
commit ce2e2e9095
parent 50a5b7567f
7 changed files with 13336 additions and 15 deletions
--- a/bin/docs/build_standalone_md.py
+++ b/bin/docs/build_standalone_md.py
@ -0,0 +1,337 @@
+#!/usr/bin/env python3
+"""
+Build a standalone Markdown file of all the documentation. All links and anchors are rewritten so the
+links are functional: during the concatenation of two files, the script insures that an anchor is well
+specific to a given pages. "The essential, it works": means that while this script is working, it may be
+not easy to maintain it.
+
+Examples:
+    $ python3 bin/docs/build_standalone_md.py > docs/standalone/hurl-5.0.1.md
+"""
+import os
+import re
+import sys
+import unicodedata
+
+import markdown
+from markdown import MarkdownDoc, Paragraph, RefLink, Header, Whitespace, Table, Node
+from pathlib import Path
+
+
+def add_section_header(doc: MarkdownDoc, title: str):
+    """Add a section header h1 to a Markdown document, with a given title"""
+    node = Header(title=title, level=1)
+    add_header_id(header=node, prefix=None)
+    doc.add_child(node)
+    node = Whitespace(content="\n")
+    doc.add_child(node)
+
+
+def add_sections(doc: MarkdownDoc, title: str | None, files: [str]):
+    """Add a new section to a markdown documentation, using a list of files to concatenate"""
+    if title:
+        add_section_header(doc=doc, title=title)
+
+    for file in files:
+        sys.stderr.write(f">>> Processing <{file}>...\n")
+        path = Path(file)
+        text = path.read_text()
+        file_md = markdown.parse_markdown(text=text)
+        file_md.indent()
+
+        # All ref links (https://daringfireball.net/projects/markdown/syntax) are inlined so we can concatenate
+        # multiple documents without any problem
+        #
+        # Before:
+        # ```markdown
+        # Some bla bal [a reference][ref]
+        # [ref]: https://foo.com
+        # ```
+        #
+        # After:
+        # ```markdown
+        # Some bla bal [a reference](https://foo.com)
+        # ```
+        inline_ref_link(md=file_md)
+
+        # Anchors are normalize so we can concatenate multiple documents that have the same anchors
+        #
+        # Before:
+        # ```markdown
+        # Some bla bal [a reference](#anchor)
+        # ```
+        #
+        # After:
+        # ```markdown
+        # Some bla bal [a reference](#name-of-the-document-anchor)
+
+        anchors_prefix = f"{title} {path.stem}"
+        anchors_prefix = slugify(anchors_prefix)
+        rewrite_links(md=file_md, prefix=anchors_prefix)
+
+        hr = Paragraph(content="\n\n<hr>\n\n")
+        file_md.add_child(hr)
+
+        doc.extend(file_md)
+
+
+def add_header_id(header: Header, prefix: str | None):
+    """Add an anchor id to a header
+    Example: `# Some title` => `# Some title {#a-prefix-some-title}`
+    """
+    slug = slugify(header.title)
+    if prefix:
+        _id = f"{prefix}-{slug}"
+    else:
+        _id = slug
+    header.id = _id
+    header.update_content()
+
+
+def slugify(text: str) -> str:
+    """Makes a slug from a text."""
+    text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
+    text = re.sub(r"[^\w\s/-]", "", text).strip().lower()
+    return re.sub(r"[-\s]+", "-", text).replace("/", "")
+
+
+def section_from_page(page: str):
+    """Returns the section title from a page ex: "manual.md" => "Getting Started" """
+    if page in ["home.md"]:
+        return "Introduction"
+    elif page in ["license.md"]:
+        return "Resources"
+    elif page in [
+        "installation.md",
+        "manual.md",
+        "sample.md",
+        "running-tests.md",
+        "frequently-asked-questions.md",
+    ]:
+        return "Getting Started"
+    else:
+        return "File Format"
+
+
+def rewrite_links(md: MarkdownDoc, prefix: str):
+    """When multiple Markdown documents are concatenate, we need to rewrite links and anchor because
+    some anchors can overlapped and documents are merged into a single document."""
+    # Find all headers and add an id specific to the page
+    # `# Some title` => `# Some title {#some-title}`
+    headers = [c for c in md.children if isinstance(c, Header)]
+    for header in headers:
+        add_header_id(header, prefix=prefix)
+
+    # Replace `[Foo](#anchor)` => `[Foo](#current-page-anchor)`
+    nodes = [c for c in md.children if isinstance(c, Paragraph) or isinstance(c, Table)]
+    for node in nodes:
+
+        def repl(match_obj):
+            title = match_obj.group("title")
+            anchor = match_obj.group("anchor")
+            _id = f"#{prefix}-{anchor}"
+            return f"[{title}]({_id})"
+
+        node.content = re.sub(
+            r"\[(?P<title>.+?)]\(#(?P<anchor>.+?)\)", repl, node.content
+        )
+
+    # Replace `[Foo](/docs/some-page.md#anchor)` => `[Foo](#some-page-anchor)`
+    nodes = [c for c in md.children if isinstance(c, Paragraph) or isinstance(c, Table)]
+    for node in nodes:
+
+        def repl(match_obj):
+            old = match_obj.group(0)
+            title = match_obj.group("title")
+            page = match_obj.group("page")
+            section = section_from_page(page)
+            section = slugify(section)
+            page = page[:-3]  # Remove .md extension
+            anchor = match_obj.group("anchor")
+            if anchor:
+                _id = f"#{section}-{page}-{anchor}"
+            else:
+                _id = f"#{section}-{page}"
+            new = f"[{title}]({_id})"
+            sys.stderr.write(f"Replace `{old}` to `{new}\n")
+            return new
+
+        node.content = re.sub(
+            r"\[(?P<title>.+?)]\(/docs/(?P<page>[a-zA-Z0-9-/]+?\.md)#?(?P<anchor>[a-z0-9-]+?)?\)",
+            repl,
+            node.content,
+        )
+
+    # Replace Manual links
+    # `<a href="#aws-sigv4" id="aws-sigv4">`
+    tables = [c for c in md.children if isinstance(c, Table)]
+    for table in tables:
+
+        def repl(match_obj):
+            href = match_obj.group("href")
+            _id = match_obj.group("_id")
+            if href != _id:
+                return f'<a href="{href}" id="{_id}">'
+            else:
+                return f'<a href="#{prefix}-{href}" id="{prefix}-{_id}">'
+
+        table.content = re.sub(
+            r"<a href=\"#(?P<href>.+?)\" id=\"(?P<_id>.+?)\">", repl, table.content
+        )
+        table.reformat()
+
+
+def inline_ref_link(md: MarkdownDoc):
+    """Ref links are inline: as documents are merged, we do not want to have ref links in the
+    middle of the final document."""
+    # Find all ref link:
+    p_nodes = [c for c in md.children if isinstance(c, Paragraph)]
+    ref_nodes = [c for c in md.children if isinstance(c, RefLink)]
+
+    # Inline ref links
+    for p in p_nodes:
+
+        def repl(match_obj):
+            ref = match_obj.group("ref")
+            ref_links = (n for n in ref_nodes if n.ref == ref)
+            ref_link = next(ref_links, None)
+            if not ref_link:
+                sys.stderr.write(f"No ref for [{ref}]\n")
+                return f"[{ref}]"
+            url = ref_link.link.strip()
+            new = f"[{ref}]({url})"
+            sys.stderr.write(f"Inline `[{ref}]` to `{new}`\n")
+            return new
+
+        p.content = re.sub(r"\[(?P<ref>.+?)]", repl, p.content)
+
+    # Delete ref links
+    md.remove_nodes(ref_nodes)
+
+
+def main() -> int:
+
+    standalone_md = MarkdownDoc()
+
+    add_sections(
+        doc=standalone_md,
+        title="Introduction",
+        files=[
+            "docs/home.md",
+        ],
+    )
+
+    add_sections(
+        doc=standalone_md,
+        title="Getting Started",
+        files=[
+            "docs/installation.md",
+            "docs/manual.md",
+            "docs/samples.md",
+            "docs/running-tests.md",
+            "docs/frequently-asked-questions.md",
+        ],
+    )
+
+    add_sections(
+        doc=standalone_md,
+        title="File Format",
+        files=[
+            "docs/hurl-file.md",
+            "docs/entry.md",
+            "docs/request.md",
+            "docs/response.md",
+            "docs/capturing-response.md",
+            "docs/asserting-response.md",
+            "docs/filters.md",
+            "docs/templates.md",
+            "docs/grammar.md",
+        ],
+    )
+
+    add_sections(
+        doc=standalone_md,
+        title="Resources",
+        files=[
+            "docs/license.md",
+        ],
+    )
+
+    # Make the cover
+    toc_txt = standalone_md.toc()
+    toc = Paragraph(content=toc_txt)
+    standalone_md.children.insert(0, toc)
+
+    title = Header(title="Hurl Documentation", level=1)
+    standalone_md.children.insert(0, title)
+    ws = Whitespace(content="\n")
+    standalone_md.children.insert(1, ws)
+    title = Header(title="Version 5.0.1 - 18/09/2024", level=2)
+    standalone_md.children.insert(2, title)
+    ws = Whitespace(content="\n")
+    standalone_md.children.insert(3, ws)
+
+    standalone = standalone_md.to_text()
+    standalone = rewrite_content(standalone)
+
+    print(standalone)
+    return os.EX_OK
+
+
+def rewrite_content(text: str) -> str:
+    """Some hardcoded replacement."""
+    return (
+        text.replace("/docs/assets/img/", "https://hurl.dev/assets/img/")
+        .replace('<div id="home-demo"></div>', "")
+        .replace("[Blog](blog.md)", "[Blog](https://hurl.dev/blog)")
+        .replace(
+            "[Tutorial](#file-format-tutorial/your-first-hurl-file)",
+            "[Tutorial](https://hurl.dev/docs/tutorial/your-first-hurl-file.html)",
+        )
+        .replace(
+            "[Documentation](#getting-started-installation)",
+            "[Documentation](https://hurl.dev)",
+        )
+        .replace(
+            " (download [HTML](/docs/standalone/hurl-5.0.1.html), [PDF](/docs/standalone/hurl-5.0.1.pdf), [Markdown](/docs/standalone/hurl-5.0.1.md))",
+            "",
+        )
+        .replace("/docs/asserting-response.html#", "#file-format-asserting-response-")
+        .replace(
+            '<a href="/docs/capturing-response.html">',
+            '<a href="#file-format-capturing-response-capturing-response">',
+        )
+        .replace(
+            '<a href="#method">Method</a>',
+            '<a href="#file-format-request-method">Method</a>',
+        )
+        .replace('<a href="#url">URL</a>', '<a href="#file-format-request-url">URL</a>')
+        .replace(
+            '<a href="#headers">HTTP request headers</a>',
+            '<a href="#file-format-request-headers">HTTP request headers</a>',
+        )
+        .replace(
+            '<a href="#query-parameters">Query strings</a>',
+            '<a href="#file-format-request-query-parameters">Query strings</a>',
+        )
+        .replace(
+            '<a href="#form-parameters">form params</a>',
+            '<a href="#file-format-request-form-parameters">form params</a>',
+        )
+        .replace(
+            '<a href="#cookies">cookies</a>',
+            '<a href="#file-format-request-cookies">cookies</a>',
+        )
+        .replace(
+            '<a href="#basic-authentication">authentication</a>',
+            '<a href="#file-format-request-basic-authentication">authentication</a>',
+        )
+        .replace(
+            '<a href="#body">HTTP request body</a>',
+            '<a href="#file-format-request-body">HTTP request body</a>',
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/bin/docs/markdown.py
+++ b/bin/docs/markdown.py
@ -42,7 +42,7 @@ class Whitespace(Node):
    pass


-def build_header(title: str, level: int) -> str:
+def build_header(title: str, level: int, _id: str | None) -> str:
    """Constructs a header in Markdown format.

    Arg:
@ -50,7 +50,10 @@ def build_header(title: str, level: int) -> str:
        level: 1 base index of the header level
    """
    hashes = "#" * level
-    return f"{hashes} {title}\n"
+    if _id:
+        return f"{hashes} {title} {{#{_id}}}\n"
+    else:
+        return f"{hashes} {title}\n"


 class Header(Node):
@ -58,11 +61,13 @@ class Header(Node):

    title: str
    level: int
+    _id: str | None

-    def __init__(self, title: str, level: int) -> None:
+    def __init__(self, title: str, level: int, _id: str = None) -> None:
        super().__init__(content=None)
        self.title = title
        self.level = level
+        self._id = _id
        self.update_content()

    def indent(self, count: int) -> None:
@ -75,7 +80,15 @@ class Header(Node):
        self.update_content()

    def update_content(self) -> None:
-        self.content = build_header(title=self.title, level=self.level)
+        self.content = build_header(title=self.title, level=self.level, _id=self._id)
+
+    @property
+    def id(self) -> str | None:
+        return self._id
+
+    @id.setter
+    def id(self, value: str | None):
+        self._id = value


 class RefLink(Node):
@ -198,6 +211,19 @@ def parse_code(parser: Parser) -> Code:
    return Code(content=content)


+def parse_table(parser: Parser) -> Table:
+    """Parse and return a table token."""
+    content = ""
+    while parser.left() > 0:
+        line = parser.read_while(lambda it: it != "\n")
+        _ = parser.read()
+        content += line + "\n"
+        c = parser.peek()
+        if c != "|":
+            break
+    return Table(content=content)
+
+
 def parse_header(parser: Parser) -> Header:
    """Parse and return a header token."""
    hashes = parser.read_while(lambda it: it == "#")
@ -255,6 +281,11 @@ def parse_markdown(text: str) -> "MarkdownDoc":
                root.add_child(node)
                continue

+        if c == "|":
+            node = parse_table(parser=parser)
+            root.add_child(node)
+            continue
+
        # Default node parsing:
        node = parse_paragraph(parser=parser)
        root.add_child(node)
@ -312,12 +343,12 @@ class MarkdownDoc:
        self.children.extend(other.children)

    def insert_node(self, start: Node, node: Node) -> None:
-        """Insert a child node to the current document, after a specified node."""
+        """Insert a child node to the current document, before a specified node."""
        index = self.children.index(start)
        self.children.insert(index, node)

    def insert_nodes(self, start: Node, nodes: List[Node]) -> None:
-        """Insert children nodes to the current document, after a specified node."""
+        """Insert children nodes to the current document, before a specified node."""
        index = self.children.index(start)
        self.children[index:index] = nodes

@ -373,15 +404,23 @@ class MarkdownDoc:
            return re.sub(r"[-\s]+", "-", value).replace("/", "")

        headers = [child for child in self.children if isinstance(child, Header)]
+
+        # Find the minimum header level, we'll delta all documents level from this
+        min_level = min([h.level for h in headers])
+
        toc = dedent(
            """\
-        Table of Contents
-        =================
+        # Table of Contents
+        
        """
        )
        for header in headers:
-            indent = "   " * header.level
-            slug = slugify(header.title)
+            indent = "    " * (header.level - min_level)
+            if header.id:
+                slug = header.id
+            else:
+                slug = slugify(header.title)
            line = f"{indent}* [{header.title}](#{slug})\n"
            toc += line
+        toc += "\n"
        return toc
--- a/docs/home.md
+++ b/docs/home.md
@ -174,7 +174,7 @@ HTTP 200

 [Tutorial]

-[Documentation]
+[Documentation] (download [HTML], [PDF], [Markdown]) 

 [GitHub]

@ -191,3 +191,6 @@ HTTP 200
 [GitHub]: https://github.com/Orange-OpenSource/hurl
 [libcurl]: https://curl.se/libcurl/
 [star Hurl on GitHub]: https://github.com/Orange-OpenSource/hurl/stargazers
+[HTML]: /docs/standalone/hurl-5.0.1.html
+[PDF]: /docs/standalone/hurl-5.0.1.pdf
+[Markdown]: /docs/standalone/hurl-5.0.1.md
--- a/docs/samples.md
+++ b/docs/samples.md
@ -17,7 +17,7 @@ oriented output, you can use [`--test` option]:
 $ hurl --test sample.hurl
 ```

-A particular response can be saved with [`[Options] section`][option]:
+A particular response can be saved with [`[Options] section`](/docs/request.md#options):

 ```hurl
 GET https://example.ord/cats/123
@ -766,7 +766,7 @@ Action: GetCallerIdentity
 Version: 2011-06-15
 ```

-The Access Key is given per [`--user`], either with command line option or within the [`[Options]`][option] section:
+The Access Key is given per [`--user`], either with command line option or within the [`[Options]`](/docs/request.md#options) section:

 ```hurl
 POST https://sts.eu-central-1.amazonaws.com/
@ -789,7 +789,7 @@ to each request of an Hurl file.
 $ hurl --resolve foo.com:8000:127.0.0.1 foo.hurl
 ```

-Use  [`[Options]` section][option] to configure a specific request:
+Use  [`[Options]` section](/docs/request.md#options) to configure a specific request:

 ```hurl
 GET http://bar.com
@ -824,7 +824,6 @@ HTTP 200
 [Hurl templates]: /docs/templates.md
 [AWS Signature Version 4]: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html
 [Captures]: /docs/capturing-response.md
-[option]: /docs/request.md#options
 [`--json` option]: /docs/manual.md#json
 [`--resolve`]: /docs/manual.md#resolve
 [`--connect-to`]: /docs/manual.md#connect-to
--- a/docs/standalone/hurl-5.0.1.html
+++ b/docs/standalone/hurl-5.0.1.html
--- a/docs/standalone/hurl-5.0.1.md
+++ b/docs/standalone/hurl-5.0.1.md
--- a/docs/standalone/hurl-5.0.1.pdf
+++ b/docs/standalone/hurl-5.0.1.pdf