robrighter / node-xml

An xml parser for node.js written in javascript.
197 stars 59 forks source link

onEndDocument called multiple times #15

Open magicmonkey opened 13 years ago

magicmonkey commented 13 years ago

When running this code: https://gist.github.com/900680 on an XML file with 76 <row ... /> elements in it (and some starting and ending tags), the onEndDocument callback gets called several times. Interestingly, it gets called more times if I reduce the size of the chunks (and therefore increase the number of chunks) being pushed through the parser, but it's not called as many times as the number of chunks.

For example, here is a run with chunkSize=100:

Started parsing
Finished parsing with 2 entries, finished 1 times
Finished parsing with 9 entries, finished 2 times
Finished parsing with 13 entries, finished 3 times
Finished parsing with 22 entries, finished 4 times
Finished parsing with 28 entries, finished 5 times
Finished parsing with 42 entries, finished 6 times
Finished parsing with 47 entries, finished 7 times
Finished parsing with 50 entries, finished 8 times
Finished parsing with 56 entries, finished 9 times
Finished parsing with 59 entries, finished 10 times
Finished parsing with 64 entries, finished 11 times
Finished parsing with 76 entries, finished 12 times
Finished document element oxip
Finished parsing with 76 entries, finished 13 times

Here is a sample of mkttmpl.xml:

<?xml version="1.0" encoding="UTF-8" ?>
<!--OXi oxirepserver 8.0 getLevelData-->
<oxip version="8.0" time="2011-04-03 19:44:29">
    <auth token=""/>
    <response code="001" desc="success" debug="" name="getLevelData" sessionId="-1" userId="-1">
        <level msgId="2381775" name="MKTTMPL" format="XML" rowCount="76">
            <row mkt_tmp_id="115" mkt_grp="H1" desc="Next 3 Horse Races" ob_level="" ob_sort="" template="mktgrp4" subtemplate="HR-next_races" acceptable_tmp="" override_mkt_tmp="Y" displayed="Y" disporder="0" disp_rows="0" num_evs="3" num_mkts="-1" num_ocs="3" flags="-SPL,-APT" num_settled_evs="0" outrights="A" name="Next 3 Horse Races" switch_by_ev="N" class_flags="" type_flags=""/>
            <row mkt_tmp_id="116" mkt_grp="H2" desc="Racing Today" ob_level="" ob_sort="" template="mktgrp1" subtemplate="HR-grouped_meetings" acceptable_tmp="" override_mkt_tmp="Y" displayed="Y" disporder="0" disp_rows="0" num_evs="-1" num_mkts="-1" num_ocs="-1" flags="" num_settled_evs="-1" outrights="A" name="Racing Today - HR/G" switch_by_ev="N" class_flags="" type_flags=""/>
            <row mkt_tmp_id="117" mkt_grp="H3" desc="Tote" ob_level="" ob_sort="" template="mktgrp5" subtemplate="HR-class_tote" acceptable_tmp="" override_mkt_tmp="Y" displayed="Y" disporder="0" disp_rows="0" num_evs="-1" num_mkts="-1" num_ocs="-1" flags="" num_settled_evs="-1" outrights="A" name="Tote - Multi" switch_by_ev="N" class_flags="" type_flags=""/>
            <row mkt_tmp_id="118" mkt_grp="H4" desc="Racing Tomorrow" ob_level="" ob_sort="" template="mktgrp2" subtemplate="HR-grouped_meetings" acceptable_tmp="" override_mkt_tmp="Y" displayed="Y" disporder="0" disp_rows="0" num_evs="-1" num_mkts="-1" num_ocs="-1" flags="" num_settled_evs="-1" outrights="A" name="Racing Tomorrow - HR/G" switch_by_ev="N" class_flags="" type_flags=""/>

        </level>
    </response>
</oxip>

In my file, there are 76 tags, all between a single tag.

psema4 commented 13 years ago

This happens with node-xml's example.js as well:

node v0.5.0-pre & node-xml v1.0.0 (via npm install)

// ...
cb.onStartDocument(function() {
    sys.puts("DOC-START");
});
cb.onEndDocument(function() {
    sys.puts("DOC-END");
});
// ...
//example read from chunks
parser.parseString("<html><body>");
parser.parseString("<!-- This is the start");
parser.parseString(" and the end of a comment -->");
parser.parseString("and lots");
parser.parseString("and lots of text&am");
parser.parseString("p;some more.");
parser.parseString("<![CD");
parser.parseString("ATA[ this is");
parser.parseString(" cdata ]]>");
parser.parseString("</body");
parser.parseString("></html>");

Outputs:

DOC-START
=> Started: html uri=null (Attributes: [] )
=> Started: body uri=null (Attributes: [] )
DOC-END
<COMMENT> This is the start and the end of a comment </COMMENT>
DOC-END
DOC-END
DOC-END
<CDATA> this is cdata </CDATA>
DOC-END
<= End: body uri=null

<= End: html uri=null

When parsing an entire file at once:

//example read from file
parser.parseFile("sample.xml");

onEndDocument() isn't called at all.