Closed wnm3 closed 3 days ago
I think this works but am interested if I've missed something...
html="""
<place your HTML content here>
"""
@staticmethod
def printNode(node: Node, incr: int, indent: int):
text = node.text(deep=False, separator="", strip=True)
if node.tag == "-text":
if len(text) != 0:
print(indent * " " + text)
else:
if len(node.attributes) == 0:
print(indent * " " + "<" + node.tag + ">")
else:
attrs = ""
for key in node.attributes:
value = node.attributes[key]
if isinstance(value, (bool, int, float)):
attrs += key + "=" + str(value) + " "
else:
attrs += key + '="' + value + '" '
print(indent * " " + "<" + node.tag + " " + attrs + ">")
for child in node.iter(include_text=True):
printNode(child, incr, indent + incr)
print(indent * " " + "</" + node.tag + ">")
@staticmethod
def printHTML(html: HTMLParser, incr: int):
root = html.root
if len(root.attributes) == 0:
print("<" + root.tag + ">" + root.text(deep=False, separator="", strip=True))
else:
attrs = ""
for key in root.attributes:
value = root.attributes[key]
if isinstance(value, (bool, int, float)):
attrs += key + "=" + value + " "
else:
attrs += key + '="' + value + '" '
if root.text_content:
print("<" + root.tag + " " + attrs + ">" + root.text_content)
else:
print("<" + root.tag + " " + attrs + ">")
for node in root.iter(include_text=True):
printNode(node, incr, incr)
print("</" + root.tag + ">")
htmlDoc = HTMLParser(html)
printHTML(htmlDoc, 2)
Hi, there is no built-in HTML formatter in selectolax. It only formats output in a pre-defined way. But your code should work fine.
What exists in selectolax to format the HTML object in a pretty print format with indentation as a string?