deedy5 / html2text_rs

Python library for converting HTML to markup or plain text
MIT License
2 stars 0 forks source link
html-to-markdown html-to-text html2markdown html2md html2text markdown python

Python >= 3.8 Downloads CI

html2text_rs

Convert HTML to markdown or plain text.
Python binding to the rust rust-html2text library.

Table of Contents

Installation

pip install -U html2text_rs

Usage

1. text_markdown()

def text_markdown(html: str, width: int = 100):
    """Convert HTML to markdown text.

    Args:
        html (str): input html text.
        width (int): wrap text to width columns. Default is 100.

    """

example:

import html2text_rs
import requests

resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")

text_markdown = html2text_rs.text_markdown(resp.text)
print(text_markdown)

2. text_plain()

def text_plain(html: str, width: int = 100):
    """Convert HTML to plain text.

    Args:
        html (str): input html text.
        width (int): wrap text to width columns. Default is 100.

    """

example:

import html2text_rs
import requests

resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")

text_plain = html2text_rs.text_plain(resp.text)
print(text_plain)

3. text_rich()

def text_rich(html: str, width: int = 100):
    """Convert HTML to rich text.

    Args:
        html (str): input html text.
        width (int): wrap text to width columns. Default is 100.

    """

example:

import html2text_rs
import requests

resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")

text_rich = html2text_rs.text_rich(resp.text)
print(text_rich)