Create directory index - Githubissues

I have implemented an workaround for this case, if anyone is interested:
import os
import re
import typing
import tempfile
import functools

# python -m pip install grip
# python -m pip install marko (for parsing Markdown files)
# python -m pip install dotenv (optional, for reading GitHub tokens)
import grip
import dotenv
import marko.block
import marko.parser
import marko.md_renderer

def md_to_htmlmd(fpath: str) -> marko.block.Document:
    """Convert all references to Markdown paths to HTML paths in a Markdown document.

    Suppose a Markdown file *main.md* has a link that redirects to 
    *some/path/file.md*. If we convert *file.md* to HTML, changing both its 
    contents and file extension, the reference on *main.md* would fail. Therefore,
    we must change this reference to *some/path/file.html* instead.

    This functions affects Markdown links (`(foo)[bar]`) and HTML links (`a` tags).

    Arguments:
    fpath -- the path of the Markdown file to be relinked
    """
    parser = marko.parser.Parser()
    with open(fpath, encoding="utf-8") as f:
        document = parser.parse(f.read())

    for node in document.children:
        match node:
            case marko.block.Paragraph():
                for child_node in node.children:
                    match child_node:
                        case marko.inline.Link():
                            url: str = child_node.dest
                            text: str = child_node.children[0].children
                            if not url.endswith(".md"):
                                continue

                            updated_url, _ = url.rsplit(".", 1)
                            updated_url = f"{updated_url}.html"
                            child_node.dest = updated_url

                        case marko.inline.InlineHTML():
                            html: str = typing.cast(str, child_node.children)

                            if not (match := re.match(r'<a href="(.+\.md)">', html)):
                                continue

                            url = match.group(1)
                            updated_url, _ = url.rsplit(".", 1)
                            updated_url = f"{updated_url}.html"
                            child_node.children = f'<a href="{updated_url}">'

    return document

Tree: typing.TypeAlias = dict[str, "Node"]
Node: typing.TypeAlias = Tree | None

# The following object
#
# data: Tree = {
#     "assets": {fname: None for fname in ("alfa", "bravo", "charlie", "delta")},
#     # Equivalent to `dict.fromkeys(("alfa", "bravo", "charlie", "delta"))`
#
#     "project": {
#         "lib": dict.fromkeys(("i", "ii", "iii", "iv")),
#         "i": dict.fromkeys(map("{:02d}".format, range(3))),
#         "ii": dict.fromkeys(map("{:02d}".format, range(3))),
#         "iii": dict.fromkeys(map("{:02d}".format, range(3))),
#         "iv": dict.fromkeys(map("{:02d}".format, range(3))),
#         "README": None,
#     },
#
#     "README": None,
# }
#
# is equivalent to the following directory structure:
#
# .
# |_ assets/
# |   |_ alfa.html
# |   |_ bravo.html
# |   |_ charlie.html
# |   |_ delta.html
# |_ project/
# |   |_ lib/
# |   |   |_ i.html
# |   |   |_ ii.html
# |   |   |_ iii.html
# |   |   |_ iv.html
# |   |_ i/
# |   |   |_ 00.html
# |   |   |_ 01.html
# |   |   |_ 02.html
# |   |_ ...
# |   |_ iv/
# |   |   |_ 00.html
# |   |   |_ 01.html
# |   |   |_ 02.html
# |   |_ README.html
# |_ README.html

def parse(dpath: str, tree: Tree, *, force: bool = False):
    """Convert a Markdown-based directory to a HTML-based directory.

    This function will

    - create a *static/* folder on the current working directory to
      include all the generated HTML files and their directory listing.
      The Markdown files are _not_ affected.

    - create a *index.html* on directories describing their contents. 
      The generated layout is based on Edge's directory listing, but you
      can change it on the last lines of this function.

    - convert all Markdown files to HTML files, including their inner 
      references to other Markdown files.

    Arguments:
    dpath -- a path to a directory in the system
    tree  -- the tree of contents of this directory
    force -- if False, this function will not generate a new HTML file for
             a MD file if there is already one on *static*.
    """
    fnames: list[str] = []
    for key, subtree in tree.items():
        fname: str
        if subtree is None:
            # `key` refers to a file path

            in_fpath = os.path.join(dpath, key)
            out_fpath, _ = os.path.splitext(in_fpath)

            out_fpath = os.path.join("static", f"{out_fpath}.html")
            fname = os.path.basename(out_fpath)

            if force or not os.path.isfile(out_fpath):
                document = md_to_htmlmd(in_fpath)

                # Creates a temporary file to store the new Markdown file
                # with inner Markdown references changed to HTML references
                with tempfile.NamedTemporaryFile(
                    "w+", encoding="utf-8", delete=False
                ) as out_f:
                    with marko.md_renderer.MarkdownRenderer() as renderer:
                        out_f.write(renderer.render(document))

                try:
                    os.makedirs(os.path.dirname(out_fpath))
                except FileExistsError:
                    pass

                # Customize your Grip transformation here
                grip.export(
                    title=os.path.basename(in_fpath),
                    path=out_f.name,
                    out_filename=out_fpath,
                    password=os.environ["GRIP_GITHUB_TOKEN"],
                )

                # Deletes the temporary file
                os.remove(out_f.name)

        else:
            # `key` refers to a directory path
            fname = f"{key}/"

            # Call `parse` again on that directory
            parse(os.path.join(dpath, key), subtree, force=force)

        fnames.append(fname)

    # Creates the directory link with the filenames obtained when iterating
    with open(os.path.join("static", dpath, "index.html"), "w+", encoding="utf-8") as f:
        printf = functools.partial(print, file=f)

        title = f"Index of {os.path.relpath(dpath).removesuffix('.')}\\"

        printf("<!DOCTYPE HTML>")
        printf('<html lang="pt">')
        printf("<head>")
        printf('<meta charset="utf-8">')
        printf(f"<title>{title}</title>")
        printf("</head>")
        printf("<body>")
        printf(f"<h1>{title}</h1>")
        printf("<hr>")
        printf("<ul>")
        # This sorting sorts by directories first, then by name
        for fname in sorted(fnames, key=lambda v: (v[-1] != '/', v)):
            printf(f'<li><a href="{fname}">{fname}</a></li>')
        printf("</ul>")
        printf("<hr>")
        printf("</body>")
        printf("</html>")

def main() -> None:
    # Load environment variables from .env (optional)
    dotenv.load_dotenv()

    data: Tree = {}
    for root, dnames, fnames in os.walk("."):
        # Iterate recursively over all directories and files
        # in the current directory searching for Markdown files.
        keys = root.split(os.path.sep)
        child_data: Tree = data
        if len(keys) > 1:
            for key in root.split(os.path.sep)[1:]:
                child_data = child_data[key]

        for dname in dnames:
            child_data[dname] = {}

        for fname in fnames:
            if fname.endswith(".md"):
                child_data[fname] = None

    parse(".", data)

if __name__ == "__main__":
    main()
joeyespo / grip

Create directory index #303