Open filips123 opened 5 years ago
+1 This would be super useful
I have implemented an workaround for this case, if anyone is interested:
import os
import re
import typing
import tempfile
import functools
# python -m pip install grip
# python -m pip install marko (for parsing Markdown files)
# python -m pip install dotenv (optional, for reading GitHub tokens)
import grip
import dotenv
import marko.block
import marko.parser
import marko.md_renderer
def md_to_htmlmd(fpath: str) -> marko.block.Document:
"""Convert all references to Markdown paths to HTML paths in a Markdown document.
Suppose a Markdown file *main.md* has a link that redirects to
*some/path/file.md*. If we convert *file.md* to HTML, changing both its
contents and file extension, the reference on *main.md* would fail. Therefore,
we must change this reference to *some/path/file.html* instead.
This functions affects Markdown links (`(foo)[bar]`) and HTML links (`a` tags).
Arguments:
fpath -- the path of the Markdown file to be relinked
"""
parser = marko.parser.Parser()
with open(fpath, encoding="utf-8") as f:
document = parser.parse(f.read())
for node in document.children:
match node:
case marko.block.Paragraph():
for child_node in node.children:
match child_node:
case marko.inline.Link():
url: str = child_node.dest
text: str = child_node.children[0].children
if not url.endswith(".md"):
continue
updated_url, _ = url.rsplit(".", 1)
updated_url = f"{updated_url}.html"
child_node.dest = updated_url
case marko.inline.InlineHTML():
html: str = typing.cast(str, child_node.children)
if not (match := re.match(r'<a href="(.+\.md)">', html)):
continue
url = match.group(1)
updated_url, _ = url.rsplit(".", 1)
updated_url = f"{updated_url}.html"
child_node.children = f'<a href="{updated_url}">'
return document
Tree: typing.TypeAlias = dict[str, "Node"]
Node: typing.TypeAlias = Tree | None
# The following object
#
# data: Tree = {
# "assets": {fname: None for fname in ("alfa", "bravo", "charlie", "delta")},
# # Equivalent to `dict.fromkeys(("alfa", "bravo", "charlie", "delta"))`
#
# "project": {
# "lib": dict.fromkeys(("i", "ii", "iii", "iv")),
# "i": dict.fromkeys(map("{:02d}".format, range(3))),
# "ii": dict.fromkeys(map("{:02d}".format, range(3))),
# "iii": dict.fromkeys(map("{:02d}".format, range(3))),
# "iv": dict.fromkeys(map("{:02d}".format, range(3))),
# "README": None,
# },
#
# "README": None,
# }
#
# is equivalent to the following directory structure:
#
# .
# |_ assets/
# | |_ alfa.html
# | |_ bravo.html
# | |_ charlie.html
# | |_ delta.html
# |_ project/
# | |_ lib/
# | | |_ i.html
# | | |_ ii.html
# | | |_ iii.html
# | | |_ iv.html
# | |_ i/
# | | |_ 00.html
# | | |_ 01.html
# | | |_ 02.html
# | |_ ...
# | |_ iv/
# | | |_ 00.html
# | | |_ 01.html
# | | |_ 02.html
# | |_ README.html
# |_ README.html
def parse(dpath: str, tree: Tree, *, force: bool = False):
"""Convert a Markdown-based directory to a HTML-based directory.
This function will
- create a *static/* folder on the current working directory to
include all the generated HTML files and their directory listing.
The Markdown files are _not_ affected.
- create a *index.html* on directories describing their contents.
The generated layout is based on Edge's directory listing, but you
can change it on the last lines of this function.
- convert all Markdown files to HTML files, including their inner
references to other Markdown files.
Arguments:
dpath -- a path to a directory in the system
tree -- the tree of contents of this directory
force -- if False, this function will not generate a new HTML file for
a MD file if there is already one on *static*.
"""
fnames: list[str] = []
for key, subtree in tree.items():
fname: str
if subtree is None:
# `key` refers to a file path
in_fpath = os.path.join(dpath, key)
out_fpath, _ = os.path.splitext(in_fpath)
out_fpath = os.path.join("static", f"{out_fpath}.html")
fname = os.path.basename(out_fpath)
if force or not os.path.isfile(out_fpath):
document = md_to_htmlmd(in_fpath)
# Creates a temporary file to store the new Markdown file
# with inner Markdown references changed to HTML references
with tempfile.NamedTemporaryFile(
"w+", encoding="utf-8", delete=False
) as out_f:
with marko.md_renderer.MarkdownRenderer() as renderer:
out_f.write(renderer.render(document))
try:
os.makedirs(os.path.dirname(out_fpath))
except FileExistsError:
pass
# Customize your Grip transformation here
grip.export(
title=os.path.basename(in_fpath),
path=out_f.name,
out_filename=out_fpath,
password=os.environ["GRIP_GITHUB_TOKEN"],
)
# Deletes the temporary file
os.remove(out_f.name)
else:
# `key` refers to a directory path
fname = f"{key}/"
# Call `parse` again on that directory
parse(os.path.join(dpath, key), subtree, force=force)
fnames.append(fname)
# Creates the directory link with the filenames obtained when iterating
with open(os.path.join("static", dpath, "index.html"), "w+", encoding="utf-8") as f:
printf = functools.partial(print, file=f)
title = f"Index of {os.path.relpath(dpath).removesuffix('.')}\\"
printf("<!DOCTYPE HTML>")
printf('<html lang="pt">')
printf("<head>")
printf('<meta charset="utf-8">')
printf(f"<title>{title}</title>")
printf("</head>")
printf("<body>")
printf(f"<h1>{title}</h1>")
printf("<hr>")
printf("<ul>")
# This sorting sorts by directories first, then by name
for fname in sorted(fnames, key=lambda v: (v[-1] != '/', v)):
printf(f'<li><a href="{fname}">{fname}</a></li>')
printf("</ul>")
printf("<hr>")
printf("</body>")
printf("</html>")
def main() -> None:
# Load environment variables from .env (optional)
dotenv.load_dotenv()
data: Tree = {}
for root, dnames, fnames in os.walk("."):
# Iterate recursively over all directories and files
# in the current directory searching for Markdown files.
keys = root.split(os.path.sep)
child_data: Tree = data
if len(keys) > 1:
for key in root.split(os.path.sep)[1:]:
child_data = child_data[key]
for dname in dnames:
child_data[dname] = {}
for fname in fnames:
if fname.endswith(".md"):
child_data[fname] = None
parse(".", data)
if __name__ == "__main__":
main()
Can you add support to serve directory which will allow viewing of all files? Files should be converted to HTML automatically but with original filename and extension.