aaronsw / html2text

Convert HTML to Markdown-formatted text.
http://www.aaronsw.com/2002/html2text/
GNU General Public License v3.0
2.57k stars 410 forks source link

Where does the asterisk * come from? #123

Open QGB opened 3 years ago

QGB commented 3 years ago
<strong data-reactid=".0.7:$order-1133167010696432626.$1133167010696432626.0.1:1:0.$0.$4.0.0.2.0">
  <span data-reactid=".0.7:$order-1133167010696432626.$1133167010696432626.0.1:1:0.$0.$4.0.0.2.0.0">¥</span>
  <span data-reactid=".0.7:$order-1133167010696432626.$1133167010696432626.0.1:1:0.$0.$4.0.0.2.0.1">0.00</span>
</strong>

html2text(_) Out[53]: ' ¥ 0.00 \n\n'

  <string>(1)<module>()

  c:\qgb\babun\cygwin\bin\qgb\t.py(425)html2text()
    423         h.ignore_images=ignore_images
    424         h.ignore_links=ignore_links
--> 425         return h.handle(html)
    426
    427 def html_prettify(html, formatter="html5",p=py.No('auto')):

  c:\qgb\anaconda3\lib\site-packages\html2text\__init__.py(129)handle()
    127
    128     def handle(self, data):
--> 129         self.feed(data)
    130         self.feed("")
    131         markdown = self.optwrap(self.close())

  c:\qgb\anaconda3\lib\site-packages\html2text\__init__.py(126)feed()
    124     def feed(self, data):
    125         data = data.replace("</' + 'script>", "</ignore>")
--> 126         super().feed(data)
    127
    128     def handle(self, data):

  c:\qgb\anaconda3\lib\html\parser.py(111)feed()
    109         """
    110         self.rawdata = self.rawdata + data
--> 111         self.goahead(0)
    112
    113     def close(self):

  c:\qgb\anaconda3\lib\html\parser.py(171)goahead()
    169             if startswith('<', i):
    170                 if starttagopen.match(rawdata, i): # < + letter
--> 171                     k = self.parse_starttag(i)
    172                 elif startswith("</", i):
    173                     k = self.parse_endtag(i)

  c:\qgb\anaconda3\lib\html\parser.py(345)parse_starttag()
    343             self.handle_startendtag(tag, attrs)
    344         else:
--> 345             self.handle_starttag(tag, attrs)
    346             if tag in self.CDATA_CONTENT_ELEMENTS:
    347                 self.set_cdata_mode(tag)

  c:\qgb\anaconda3\lib\site-packages\html2text\__init__.py(178)handle_starttag()
    176
    177     def handle_starttag(self, tag, attrs):
--> 178         self.handle_tag(tag, attrs, start=True)
    179
    180     def handle_endtag(self, tag):

  c:\qgb\anaconda3\lib\site-packages\html2text\__init__.py(399)handle_tag()
    397                 strong = self.strong_mark
    398
--> 399             self.o(strong)
    400             if start:
    401                 self.stressed = True

  c:\qgb\anaconda3\lib\site-packages\html2text\__init__.py(783)o()
    781
    782             self.p_p = 0
--> 783             self.out(data)
    784             self.outcount += 1
    785

> c:\qgb\anaconda3\lib\site-packages\html2text\__init__.py(138)outtextf()
    136
    137     def outtextf(self, s):
2-> 138         self.outtextlist.append(s)
    139         if s:
    140             self.lastWasNL = s[-1] == "\n"

ipdb>                                                                                                                                                               
QGB commented 3 years ago

<strong> tag return **