MichaelHatherly / CommonMark.jl

A CommonMark-compliant Markdown parser for Julia.
Other
84 stars 11 forks source link

Error handling multi unicode text #52

Open Rratic opened 1 year ago

Rratic commented 1 year ago

( found these while building docs

p=Parser(); enable!(p, AdmonitionRule());

julia> p("""
       !!! note "Ju 的文字"
           Ju
       """)
ERROR: StringIndexError: invalid index [18], valid nearby indices [17]=>'文', [20]=>'字'
...

julia> raw="在上一篇[向量化编程与广播(1):引言](20220515.md)中我们介绍了 `meshgrid` 这个函数的由来,以及表明广播的存在使得它在大多数时候已经成为一个不 必要的函数了。在这一篇中我们将更具体地介绍广播的基本规则,从而在实际编程中可以能够更自如地使用这一概念。"; p(raw)
Error showing value of type CommonMark.Node:
ERROR: StringIndexError: invalid index [84], valid nearby indices [83]=>'经', [86]=>'成'
Stacktrace:
  [1] string_index_err(s::String, i::Int64)
    @ Base .\strings\string.jl:12
  [2] findprev(testf::CommonMark.var"#35#36", s::String, i::Int64)
    @ Base .\strings\search.jl:408
  [3] print_literal_part(r::CommonMark.Writer{CommonMark.Term, IOContext{Base.TTY}}, lit::String, rec::Int64)
...
MichaelHatherly commented 1 year ago

@Rratic can you provide a complete stacktrace for each of those errors if possible rather than truncating with ..., will be helpful, thanks.

Rratic commented 1 year ago

@MichaelHatherly Well, I thought you can run the code yourself.

ERROR: StringIndexError: invalid index [18], valid nearby indices [17]=>'文', [20]=>'字'
Stacktrace:
  [1] string_index_err(s::String, i::Int64)
    @ Base .\strings\string.jl:12
  [2] getindex_continued(s::String, i::Int64, u::UInt32)
    @ Base .\strings\string.jl:233
  [3] getindex
    @ .\strings\string.jl:226 [inlined]
  [4] get
    @ .\strings\basic.jl:200 [inlined]
  [5] find_next_nonspace(parser::Parser)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:185
  [6] incorporate_line(parser::Parser, ln::String)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:298
  [7] parse(parser::Parser, my_input::IOBuffer; kws::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:440
  [8] parse
    @ C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:427 [inlined]
  [9] #_#21
    @ C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:451 [inlined]
 [10] Parser
    @ C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:451 [inlined]
 [11] #_#20
    @ C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:450 [inlined]
 [12] (::Parser)(text::String)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\parsers\blocks.jl:450
 [13] top-level scope
    @ REPL[3]:1

Error showing value of type CommonMark.Node:
ERROR: StringIndexError: invalid index [118], valid nearby indices [116]=>'了', [119]=>'。'
Stacktrace:
  [1] string_index_err(s::SubString{String}, i::Int64)
    @ Base .\strings\string.jl:12
  [2] findprev(testf::CommonMark.var"#35#36", s::SubString{String}, i::Int64)
    @ Base .\strings\search.jl:408
  [3] print_literal_part(r::CommonMark.Writer{CommonMark.Term, IOContext{Base.TTY}}, lit::SubString{String}, rec::Int64)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:190
  [4] print_literal_part(r::CommonMark.Writer{CommonMark.Term, IOContext{Base.TTY}}, lit::String, rec::Int64)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:200
  [5] print_literal_part
    @ C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:184 [inlined]
  [6] print_literal(r::CommonMark.Writer{CommonMark.Term, IOContext{Base.TTY}}, parts::String)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:178
  [7] write_term(#unused#::CommonMark.Text, render::CommonMark.Writer{CommonMark.Term, IOContext{Base.TTY}}, node::CommonMark.Node, enter::Bool)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:216
  [8] write_term(writer::CommonMark.Writer{CommonMark.Term, IOContext{Base.TTY}}, ast::CommonMark.Node)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:38
  [9] show(io::IOContext{Base.TTY}, ::MIME{Symbol("text/plain")}, ast::CommonMark.Node, env::Dict{String, Any})
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:5
 [10] show(io::IOContext{Base.TTY}, ::MIME{Symbol("text/plain")}, ast::CommonMark.Node)
    @ CommonMark C:\Users\rratic\.julia\packages\CommonMark\qXeHg\src\writers\term.jl:4
 [11] (::REPL.var"#38#39"{REPL.REPLDisplay{REPL.LineEditREPL}, MIME{Symbol("text/plain")}, Base.RefValue{Any}})(io::Any)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:220
 [12] with_repl_linfo(f::Any, repl::REPL.LineEditREPL)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:462
 [13] display(d::REPL.REPLDisplay, mime::MIME{Symbol("text/plain")}, x::Any)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:213
 [14] display(d::REPL.REPLDisplay, x::Any)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:225
 [15] display(x::Any)
    @ Base.Multimedia .\multimedia.jl:328
 [16] #invokelatest#2
    @ .\essentials.jl:708 [inlined]
 [17] invokelatest
    @ .\essentials.jl:706 [inlined]
 [18] print_response(errio::IO, response::Any, show_value::Bool, have_color::Bool, specialdisplay::Union{Nothing, AbstractDisplay})
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:247
 [19] (::REPL.var"#40#41"{REPL.LineEditREPL, Pair{Any, Bool}, Bool, Bool})(io::Any)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:231
 [20] with_repl_linfo(f::Any, repl::REPL.LineEditREPL)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:462
 [21] print_response(repl::REPL.AbstractREPL, response::Any, show_value::Bool, have_color::Bool)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:229
 [22] (::REPL.var"#do_respond#61"{Bool, Bool, REPL.var"#72#82"{REPL.LineEditREPL, REPL.REPLHistoryProvider}, REPL.LineEditREPL, REPL.LineEdit.Prompt})(s::REPL.LineEdit.MIState, buf::Any, ok::Bool)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:798
 [23] #invokelatest#2
    @ .\essentials.jl:708 [inlined]
 [24] invokelatest
    @ .\essentials.jl:706 [inlined]
 [25] run_interface(terminal::REPL.Terminals.TextTerminal, m::REPL.LineEdit.ModalInterface, s::REPL.LineEdit.MIState)
    @ REPL.LineEdit C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\LineEdit.jl:2441
 [26] run_frontend(repl::REPL.LineEditREPL, backend::REPL.REPLBackendRef)
    @ REPL C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:1126
 [27] (::REPL.var"#44#49"{REPL.LineEditREPL, REPL.REPLBackendRef})()
    @ REPL .\task.jl:411
Rratic commented 1 year ago

I guess the bug is in find_next_nonspace. Do you have any plan to fix these? Since I find many of my commonmark-based packages breaking on this.

MichaelHatherly commented 1 year ago

Feel free to dig into the code base @Rratic, in the very immediate future I won't be able to find time to fix it.

Rratic commented 1 year ago

Note that parsing works incorrectly for some more frustrating situations:

julia> html(p("x𝗑𝘅𝘹𝙭𝚡x𝐱×х⨯ⅹ"))
"<p>x𝗑 𝘅 𝘹 𝙭 𝚡 x𝐱 ×х⨯ⅹ</p>\n"

there're unexpected spaces