Nested Ordered list not working correctly

Hi @mstevens72 I've added the following functionality to my branch of this project. Replacing the "handle_li" function with the following should yield the desired behavior. Be aware that this relies on every "li" element possessing an attribute "id" unique within the html you're parsing.

Good luck!

    def handle_li(self):
        def __is_first_ol_element(HTMLParser__startag_text: str) -> bool:
            """determines if ol is first of ol list using starttag with id.

            Args:
                HTMLParser__startag_text (str): starttag as obtained by parser.

            Returns:
                bool: True if the case, False otherwise
            """

            all_ordered_lists_in_html_snippet = self.soup.find_all("ol")
            for ol in all_ordered_lists_in_html_snippet:
                try:
                    if ol.contents[0].attrs["id"] in HTMLParser__startag_text:
                        return True
                except Exception:
                    return False
            return False

        def __restart_numbering(paragraph: Paragraph) -> Paragraph:
            """Private method to reset list numbering of a given paragraph.
            Implementation from: https://github.com/python-openxml/python-docx/pull/582#issuecomment-1717139576

            Args:
                paragraph (Paragraph): paragraph object

            Returns:
                Paragraph: paragraph object with reset list numbering
            """
            # Getting the abstract number of paragraph
            abstract_num_id = (
                paragraph.part.document.part.numbering_part.element.num_having_numId(
                    paragraph.style.element.get_or_add_pPr()
                    .get_or_add_numPr()
                    .numId.val
                ).abstractNumId.val
            )

            # Add abstract number to numbering part and reset
            num = paragraph.part.numbering_part.element.add_num(abstract_num_id)
            num.add_lvlOverride(ilvl=0).add_startOverride(1)

            # Get or add elements to paragraph
            p_pr = paragraph._p.get_or_add_pPr()
            num_pr = p_pr.get_or_add_numPr()
            ilvl = num_pr.get_or_add_ilvl()
            ilvl.val = int("0")
            num_id = num_pr.get_or_add_numId()
            num_id.val = int(num.numId)
            return paragraph

        # check list stack to determine style and depth

        list_depth = len(self.tags["list"])

        # list type assignment
        if list_depth:
            list_type = self.tags["list"][-1]
        else:
            list_type = "ul"  # assign unordered if no tag

        if list_type == "ol":
            list_style = styles[f"LIST_N{list_depth}"]
        else:
            list_style = styles[f"LIST_B{list_depth}"]

        self.paragraph = self.doc.add_paragraph(style=list_style)

        if list_type == "ol" and __is_first_ol_element(
            HTMLParser__startag_text=self._HTMLParser__starttag_text, soup=self.soup
        ):
            self.paragraph = __restart_numbering(paragraph=self.paragraph)

        # Indentation: default = no indent
        if list_depth > 1:
            self.paragraph.paragraph_format.left_indent = Inches(
                min(list_depth * LIST_INDENT, MAX_INDENT)
            )
        self.paragraph.paragraph_format.line_spacing = 1

Relies on imports:


from docx.table import Table, _Cell, _Row
from docx.document import Document as DocumentClass
from docx.text.paragraph import Paragraph

pqzx / html2docx

Nested Ordered list not working correctly #40