joeyespo / grip

Preview GitHub README.md files locally before committing them.
MIT License
6.42k stars 423 forks source link

Create directory index #303

Open filips123 opened 5 years ago

filips123 commented 5 years ago

Can you add support to serve directory which will allow viewing of all files? Files should be converted to HTML automatically but with original filename and extension.

wookayin commented 4 years ago

+1 This would be super useful

enzo-santos commented 11 months ago

I have implemented an workaround for this case, if anyone is interested:

import os
import re
import typing
import tempfile
import functools

# python -m pip install grip
# python -m pip install marko (for parsing Markdown files)
# python -m pip install dotenv (optional, for reading GitHub tokens)
import grip
import dotenv
import marko.block
import marko.parser
import marko.md_renderer

def md_to_htmlmd(fpath: str) -> marko.block.Document:
    """Convert all references to Markdown paths to HTML paths in a Markdown document.

    Suppose a Markdown file *main.md* has a link that redirects to 
    *some/path/file.md*. If we convert *file.md* to HTML, changing both its 
    contents and file extension, the reference on *main.md* would fail. Therefore,
    we must change this reference to *some/path/file.html* instead.

    This functions affects Markdown links (`(foo)[bar]`) and HTML links (`a` tags).

    Arguments:
    fpath -- the path of the Markdown file to be relinked
    """
    parser = marko.parser.Parser()
    with open(fpath, encoding="utf-8") as f:
        document = parser.parse(f.read())

    for node in document.children:
        match node:
            case marko.block.Paragraph():
                for child_node in node.children:
                    match child_node:
                        case marko.inline.Link():
                            url: str = child_node.dest
                            text: str = child_node.children[0].children
                            if not url.endswith(".md"):
                                continue

                            updated_url, _ = url.rsplit(".", 1)
                            updated_url = f"{updated_url}.html"
                            child_node.dest = updated_url

                        case marko.inline.InlineHTML():
                            html: str = typing.cast(str, child_node.children)

                            if not (match := re.match(r'<a href="(.+\.md)">', html)):
                                continue

                            url = match.group(1)
                            updated_url, _ = url.rsplit(".", 1)
                            updated_url = f"{updated_url}.html"
                            child_node.children = f'<a href="{updated_url}">'

    return document

Tree: typing.TypeAlias = dict[str, "Node"]
Node: typing.TypeAlias = Tree | None

# The following object
#
# data: Tree = {
#     "assets": {fname: None for fname in ("alfa", "bravo", "charlie", "delta")},
#     # Equivalent to `dict.fromkeys(("alfa", "bravo", "charlie", "delta"))`
#
#     "project": {
#         "lib": dict.fromkeys(("i", "ii", "iii", "iv")),
#         "i": dict.fromkeys(map("{:02d}".format, range(3))),
#         "ii": dict.fromkeys(map("{:02d}".format, range(3))),
#         "iii": dict.fromkeys(map("{:02d}".format, range(3))),
#         "iv": dict.fromkeys(map("{:02d}".format, range(3))),
#         "README": None,
#     },
#
#     "README": None,
# }
#
# is equivalent to the following directory structure:
#
# .
# |_ assets/
# |   |_ alfa.html
# |   |_ bravo.html
# |   |_ charlie.html
# |   |_ delta.html
# |_ project/
# |   |_ lib/
# |   |   |_ i.html
# |   |   |_ ii.html
# |   |   |_ iii.html
# |   |   |_ iv.html
# |   |_ i/
# |   |   |_ 00.html
# |   |   |_ 01.html
# |   |   |_ 02.html
# |   |_ ...
# |   |_ iv/
# |   |   |_ 00.html
# |   |   |_ 01.html
# |   |   |_ 02.html
# |   |_ README.html
# |_ README.html

def parse(dpath: str, tree: Tree, *, force: bool = False):
    """Convert a Markdown-based directory to a HTML-based directory.

    This function will

    - create a *static/* folder on the current working directory to
      include all the generated HTML files and their directory listing.
      The Markdown files are _not_ affected.

    - create a *index.html* on directories describing their contents. 
      The generated layout is based on Edge's directory listing, but you
      can change it on the last lines of this function.

    - convert all Markdown files to HTML files, including their inner 
      references to other Markdown files.

    Arguments:
    dpath -- a path to a directory in the system
    tree  -- the tree of contents of this directory
    force -- if False, this function will not generate a new HTML file for
             a MD file if there is already one on *static*.
    """
    fnames: list[str] = []
    for key, subtree in tree.items():
        fname: str
        if subtree is None:
            # `key` refers to a file path

            in_fpath = os.path.join(dpath, key)
            out_fpath, _ = os.path.splitext(in_fpath)

            out_fpath = os.path.join("static", f"{out_fpath}.html")
            fname = os.path.basename(out_fpath)

            if force or not os.path.isfile(out_fpath):
                document = md_to_htmlmd(in_fpath)

                # Creates a temporary file to store the new Markdown file
                # with inner Markdown references changed to HTML references
                with tempfile.NamedTemporaryFile(
                    "w+", encoding="utf-8", delete=False
                ) as out_f:
                    with marko.md_renderer.MarkdownRenderer() as renderer:
                        out_f.write(renderer.render(document))

                try:
                    os.makedirs(os.path.dirname(out_fpath))
                except FileExistsError:
                    pass

                # Customize your Grip transformation here
                grip.export(
                    title=os.path.basename(in_fpath),
                    path=out_f.name,
                    out_filename=out_fpath,
                    password=os.environ["GRIP_GITHUB_TOKEN"],
                )

                # Deletes the temporary file
                os.remove(out_f.name)

        else:
            # `key` refers to a directory path
            fname = f"{key}/"

            # Call `parse` again on that directory
            parse(os.path.join(dpath, key), subtree, force=force)

        fnames.append(fname)

    # Creates the directory link with the filenames obtained when iterating
    with open(os.path.join("static", dpath, "index.html"), "w+", encoding="utf-8") as f:
        printf = functools.partial(print, file=f)

        title = f"Index of {os.path.relpath(dpath).removesuffix('.')}\\"

        printf("<!DOCTYPE HTML>")
        printf('<html lang="pt">')
        printf("<head>")
        printf('<meta charset="utf-8">')
        printf(f"<title>{title}</title>")
        printf("</head>")
        printf("<body>")
        printf(f"<h1>{title}</h1>")
        printf("<hr>")
        printf("<ul>")
        # This sorting sorts by directories first, then by name
        for fname in sorted(fnames, key=lambda v: (v[-1] != '/', v)):
            printf(f'<li><a href="{fname}">{fname}</a></li>')
        printf("</ul>")
        printf("<hr>")
        printf("</body>")
        printf("</html>")

def main() -> None:
    # Load environment variables from .env (optional)
    dotenv.load_dotenv()

    data: Tree = {}
    for root, dnames, fnames in os.walk("."):
        # Iterate recursively over all directories and files
        # in the current directory searching for Markdown files.
        keys = root.split(os.path.sep)
        child_data: Tree = data
        if len(keys) > 1:
            for key in root.split(os.path.sep)[1:]:
                child_data = child_data[key]

        for dname in dnames:
            child_data[dname] = {}

        for fname in fnames:
            if fname.endswith(".md"):
                child_data[fname] = None

    parse(".", data)

if __name__ == "__main__":
    main()