eXist-db / exist

eXist Native XML Database and Application Platform
https://exist-db.org
GNU Lesser General Public License v2.1
428 stars 179 forks source link

Only last matched item in the parent element is highlighted #4835

Open daliboris opened 1 year ago

daliboris commented 1 year ago

Describe the bug

If an element contains multiple found items, after calling util:expand() function, only the last matched item in the the parent element is expanded/highlighted.

Expected behavior I would expect a function like util:expand to highlight all matches in full text hits.

To Reproduce

xquery version "3.1";
module namespace t="http://exist-db.org/xquery/test";
declare namespace test="http://exist-db.org/xquery/xqsuite";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace exist = "http://exist.sourceforge.net/NS/exist";

declare variable $t:XML-multiple-nested := document { 
 <root>
  <div>
   <p>Letter</p>
   <p>LETTER</p>
  </div>
  <div>
   <p>letter</p>
   <p>leTTer</p>
   <div>
    <p>LeTtEr</p>
   </div>
  </div>
 </root>
};

declare variable $t:XML-multiple-levels-nested := document { 
 <root>
  <article>
   <div>
    <p>Letter</p>
   </div>
  </article>
  <article>
   <div>
    <p>LETTER</p>
   </div>
  </article>
  <article>
   <div>
    <p>letter</p>
   </div>
  </article>
 </root>
};

declare variable $t:XML-two-in-one := document { 
 <root>
   <p>Letter and letter</p>
 </root>
};

declare variable $t:XML-two-in-two := document { 
 <root>
   <p>Letter and letter</p>
   <p>LETTER and leTTer</p>
 </root>
};

declare variable $t:XML-three-in-three := document { 
 <root>
   <p>Letter</p>
   <p>LETTER</p>
   <p>letter</p>
 </root>
};

declare variable $t:XML-three-different-elements := document { 
 <root>
   <p>Letter</p>
   <h1>LETTER</h1>
   <h2>letter</h2>
 </root>
};

declare variable $t:XML-three-inline-elements := document { 
 <root>
   <p>Le<i>t</i>ter</p>
   <p>L<i>E</i>TTER</p>
   <p>let<i>t</i>er</p>
 </root>
};

declare variable $t:XML-three-inline-elements-in-one := document { 
 <root>
   <p>Le<i>t</i>ter L<i>E</i>TTER let<i>t</i>er</p>
 </root>
};

declare variable $t:xconf :=
    <collection xmlns="http://exist-db.org/collection-config/1.0">
    <index xmlns:xs="http://www.w3.org/2001/XMLSchema">
        <fulltext default="none" attributes="false"/>
        <lucene>
         <text qname="p" />
         <text qname="h1" />
         <text qname="h2" />
         <inline qname="i" />
        </lucene>
    </index>
</collection>;

   declare variable $t:testCol := xmldb:create-collection("/db", "test");
   declare variable $t:indexCol := xmldb:create-collection("/db/system/config/db", "test");

declare
    %test:setUp
function t:setup() {
        (
            xmldb:store($t:testCol, "test-multiple-nested.xml", $t:XML-multiple-nested),
            xmldb:store($t:testCol, "test-multiple-levels-nested.xml", $t:XML-multiple-levels-nested),
            xmldb:store($t:testCol, "test-two-in-one.xml", $t:XML-two-in-one),
            xmldb:store($t:testCol, "test-two-in-two.xml", $t:XML-two-in-two),
            xmldb:store($t:testCol, "test-three-in-three.xml", $t:XML-three-in-three),
            xmldb:store($t:testCol, "test-three-different-elements.xml", $t:XML-three-different-elements),
            xmldb:store($t:testCol, "test-three-inline-elements.xml", $t:XML-three-inline-elements),
            xmldb:store($t:testCol, "test-three-inline-elements-in-one.xml", $t:XML-three-inline-elements-in-one),
            xmldb:store($t:indexCol, "collection.xconf", $t:xconf),
            xmldb:reindex("/db/test")
        )
};
declare
    %test:tearDown
function t:tearDown() {
    xmldb:remove($t:testCol),
    xmldb:remove($t:indexCol)
};

declare
    %test:args("letter")
    %test:assertEquals(3)
function t:multiple-nested-parent-hits-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-nested.xml")
    let $hits := $doc/root//div[ft:query(p, $query)]
    return count($hits)
};

declare
    %test:args("letter")
    %test:assertEquals(5)
function t:multiple-nested-parent-matches-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-nested.xml")
    let $hits := $doc/root//div[ft:query(p, $query)]
    let $result := util:expand($hits)
    return count($result//exist:match)
};

declare
    %test:args("letter")
    %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><div><p><exist:match>Letter</exist:match></p><p><exist:match>LETTER</exist:match></p></div><div><p><exist:match>letter</exist:match></p><p><exist:match>leTTer</exist:match></p><div><p><exist:match>LeTtEr</exist:match></p></div></div><div><p><exist:match>LeTtEr</exist:match></p></div></result>')
function t:multiple-nested-parent-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-nested.xml")
    let $hits := $doc/root//div[ft:query(p, $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result>
};

declare
    %test:args("letter")
    %test:assertEquals(5)
function t:multiple-nested-hits-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-nested.xml")
    let $hits := $doc//p[ft:query(., $query)]
    return count($hits)
};

declare
    %test:args("letter")
    %test:assertEquals(5)
function t:multiple-nested-matches-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-nested.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return count($result//exist:match)
};

declare
    %test:args("letter")
    %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Letter</exist:match></p><p><exist:match>LETTER</exist:match></p><p><exist:match>letter</exist:match></p><p><exist:match>leTTer</exist:match></p><p><exist:match>LeTtEr</exist:match></p></result>')
function t:multiple-nested-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-nested.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result>
};

declare
    %test:args("letter")
    %test:assertEquals(3)
function t:multiple-levels-nested-hits-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-levels-nested.xml")
    let $hits := $doc//p[ft:query(., $query)]
    return count($hits)
};

declare
    %test:args("letter")
    %test:assertEquals(3)
function t:multiple-levels-nested-matches-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-levels-nested.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return count($result//exist:match)
};

declare
    %test:args("letter")
    %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Letter</exist:match></p><p><exist:match>LETTER</exist:match></p><p><exist:match>letter</exist:match></p></result>')
function t:multiple-levels-nested-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-levels-nested.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result>
};

declare
   %test:args("LETTER")
   %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Letter</exist:match></p><p><exist:match>LETTER</exist:match></p><p><exist:match>letter</exist:match></p><p><exist:match>leTTer</exist:match></p><p><exist:match>LeTtEr</exist:match></p></result>')
function t:multiple-nested-uppercase-query-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-multiple-nested.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result> 
};

declare
   %test:args("letter")
   %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Letter</exist:match> and <exist:match>letter</exist:match></p></result>')
function t:two-in-one-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-two-in-one.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result> 
};

declare
   %test:args("letter")
   %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Letter</exist:match> and <exist:match>letter</exist:match></p><p><exist:match>LETTER</exist:match> and <exist:match>leTTer</exist:match></p></result>')
function t:two-in-two-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-two-in-two.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result> 
};

declare
    %test:args("letter")
    %test:assertEquals(3)
function t:three-in-three-hits-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-in-three.xml")
    let $hits := $doc//p[ft:query(., $query)]
    return count($hits)
};

declare
    %test:args("letter")
    %test:assertEquals(3)
function t:three-different-elements-hits-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-in-three.xml")
    let $hits := $doc//(p|h1|h2)[ft:query(., $query)]
    return count($hits)
};

declare
    %test:args("letter")
    %test:assertEquals(3)
function t:three-different-elements-matches-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-different-elements.xml")
    let $hits := $doc//(p|h1|h2)[ft:query(., $query)]
    let $result := util:expand($hits)
    return count($result//exist:match)
};

declare
   %test:args("letter")
   %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Letter</exist:match></p><p><exist:match>LETTER</exist:match></p><p><exist:match>letter</exist:match></p></result>')
function t:three-different-elements-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-different-elements.xml")
    let $hits := $doc//(p|h1|h2)[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result> 
};

declare
   %test:args("letter")
   %test:assertEquals(3)
function t:inline-elements-hits-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-inline-elements.xml")
    let $hits := $doc//p[ft:query(., $query)]
    return count($hits)
};

declare
   %test:args("letter")
   %test:assertEquals(3)
function t:inline-elements-matches-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-inline-elements.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return count($result//exist:match)
};

declare
   %test:args("letter")
   %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Let<i>t</i>er</exist:match></p><p><exist:match>L<i>E</i>TTER</exist:match></p><p><exist:match>let<i>t</i>er</exist:match></p></result>')
function t:inline-elements-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-inline-elements.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result> 
};

declare
   %test:args("letter")
   %test:assertEquals(3)
function t:three-inline-elements-in-one-hits-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-inline-elements-in-one.xml")
    let $hits := $doc//p[ft:query(., $query)]
    return count($hits)
};

declare
   %test:args("letter")
   %test:assertEquals(3)
function t:three-inline-elements-in-one-matches-count($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-inline-elements-in-one.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return count($result//exist:match)
};

declare
   %test:args("letter")
   %test:assertEquals('<result xmlns:exist="http://exist.sourceforge.net/NS/exist"><p><exist:match>Let<i>t</i>er</exist:match> <exist:match>L<i>E</i>TTER</exist:match> <exist:match>let<i>t</i>er</exist:match></p></result>')
function t:three-inline-elements-in-one-matches-xml($query as xs:string) {
    let $doc := doc($t:testCol || "/test-three-inline-elements-in-one.xml")
    let $hits := $doc//p[ft:query(., $query)]
    let $result := util:expand($hits)
    return <result xmlns:exist="http://exist.sourceforge.net/NS/exist">{$result}</result> 
};

There are multiple scenarios:

All elements containing search text are found, but not all occurrences are matched with util:expand function. It seems that only item in the last nested element is matched:

<div>
  <p>Letter</p>
  <p>
   <exist:match>LETTER</exist:match>
  </p>
 </div>
 <div>
  <p>letter</p>
  <p>
   <exist:match>leTTer</exist:match>
  </p>
  <div>
   <p>
    <exist:match>LeTtEr</exist:match>
   </p>
  </div>
 </div>
 <div>
  <p>
   <exist:match>LeTtEr</exist:match>
  </p>
</div>

If there are two occurences of search text in one element, both of them are matched:

<p><exist:match>Letter</exist:match> and <exist:match>letter</exist:match></p>

but not if there are mupltiple elements with matched text:

<p><exist:match>Letter</exist:match> and letter</p>
<p><exist:match>LETTER</exist:match> and <exist:match>leTTer</exist:match></p>

Context (please always complete the following information) One option is to use xst, and copy and paste the output produced by running xst info here:**

Additional context

adamretter commented 1 year ago

I wonder if this is related to this @daliboris - https://github.com/eXist-db/exist/issues/4789#issuecomment-1458067100 and potentially this - https://github.com/eXist-db/exist/issues/4584

daliboris commented 1 year ago

I have added new tests for the inline elements and for deeply nested elements (3 levels). Based on the results I assume that: