Algocircle / Cascadia.jl

A CSS Selector library in Julia
Other
120 stars 15 forks source link

Cascadia never finishes eachmatch execution #19

Closed rapus95 closed 4 years ago

rapus95 commented 4 years ago
julia> using Gumbo, Cascadia

julia> s = Selector("""div[data-pseudo-content="Unternehmen"]+div""")
Selector(Cascadia.var"#55#56"{Selector,Selector,Bool}(Selector(Cascadia.var"#25#26"{Selector,Selector}(Selector(Cascadia.var"#5#6"(Core.Box("div"))), Selector(Cascadia.var"#7#8"{Cascadia.var"#11#12"{String}}(Cascadia.var"#11#12"{String}("Unternehmen"), Core.Box("data-pseudo-content"))))), Selector(Cascadia.var"#5#6"(Core.Box("div"))), true))

julia> begin
           root = parsehtml("""<tr _ngcontent-udu-c357="" class="table-body table-smallFont ng-tns-c357-96 ng-star-inserted"><td _ngcontent-udu-c357="" class="ng-tns-c357-96 ng-star-inserted"><div _ngcontent-udu-c357="" appdbtooltip="" class="ng-tns-c357-96" style="cursor: default; opacity: 0.4;"><span _ngcontent-udu-c357="" class="k-icon k-i-user ng-tns-c357-96"></span></div></td><!----><!----><td _ngcontent-udu-c357="" class="ng-tns-c357-96 kp-link ng-star-inserted">Mr Name</td><!----><td _ngcontent-udu-c357="" class="ng-tns-c357-96">Geschäftsführer <div _ngcontent-udu-c357="" class="ng-tns-c357-96 ng-star-inserted">der <span _ngcontent-udu-c357="" style="font-style: italic;" class="ng-tns-c357-96">Proud GmbH</span></div><!----></td><!----><td _ngcontent-udu-c357="" class="ng-tns-c357-96 ng-star-inserted">58</td><!----></tr>
           """)
           eachmatch(s, root.root)
       end

This code never stops execution for me and I have no clue why. I'd consider it a bug but maybe it is just my fault.

aviks commented 4 years ago

Yeah, looks like something goes into an infinite loop

rapus95 commented 4 years ago

I could narrow it down to the following MWE:

julia> using Gumbo, Cascadia

julia> s = Selector("""div+div""");

julia> root = parsehtml("""text <div></div>""")

julia> eachmatch(s, root.root)
#neverfinishes

if I place the text after the <div></div> everything works.

Debugging (@enter eachmatch(s, root.root)) ->nc->s->n->n keep hitting n until you get About to run: <(getfield)((HTMLElement{:div}:<div></div>

1|debug> n
In matchAllInto(s, n, storage) at C:\Users\Aaron\.julia\packages\Cascadia\BqwaO\src\selector.jl:108
 108  function matchAllInto(s::Selector, n::HTMLNode, storage::Array)
>109      for c in PreOrderDFS(n)
 110          if s(c); push!(storage, c); end
 111      end
 112      return storage
 113  end

About to run: <(getfield)((HTMLElement{:div}:<div></div>
, AbstractTrees.ImplicitNodeStack{Any,Int64}(Any[HTMLElemen...>
1|debug> n
In matchAllInto(s, n, storage) at C:\Users\Aaron\.julia\packages\Cascadia\BqwaO\src\selector.jl:108
 108  function matchAllInto(s::Selector, n::HTMLNode, storage::Array)
 109      for c in PreOrderDFS(n)
>110          if s(c); push!(storage, c); end
 111      end
 112      return storage
 113  end

About to run: <(Selector(Cascadia.var"#55#56"{Selector,Selector,Bool}(Selector(Cascadia.var"#5#6"(Core.Box("div"))),...>
1|debug> s
In Selector(n) at C:\Users\Aaron\.julia\packages\Cascadia\BqwaO\src\selector.jl:10
>1  1 ─ %1 = (getproperty)(s, :f)
 2  │   %2 = (%1)(n)
 3  └──      return %2

About to run: <(getproperty)(Selector(Cascadia.var"#55#56"{Selector,Selector,Bool}(Selector(Cascadia.var"#5#6"(Core....>
1|debug> nc
In Selector(n) at C:\Users\Aaron\.julia\packages\Cascadia\BqwaO\src\selector.jl:10
 1  1 ─ %1 = (getproperty)(s, :f)
>2  │   %2 = (%1)(n)
 3  └──      return %2

About to run: <(Cascadia.var"#55#56"{Selector,Selector,Bool}(Selector(Cascadia.var"#5#6"(Core.Box("div"))), Selector...>
1|debug> nc
#never finishes
rapus95 commented 4 years ago

I probably found it: https://github.com/Algocircle/Cascadia.jl/blob/master/src/selector.jl#L523-L525

In that continue case m won't be updated