Closed naitoh closed 2 months ago
SAX2 parser expand user-defined entity references and character references but doesn't expand predefined entity references.
require 'rexml/document' require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' require 'rexml/parsers/streamparser'
xml = <<EOS
EOS
class Listener def method_missing(name, args) p [name, args] end end
puts "REXML(DOM)" REXML::Document.new(xml).elements.each("/root/A") {|element| puts element.text}
puts "" puts "REXML(Pull)" parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? res = parser.pull p res end
puts "" puts "REXML(Stream)" parser = REXML::Parsers::StreamParser.new(xml, Listener.new).parse
puts "" puts "REXML(SAX)" parser = REXML::Parsers::SAX2Parser.new(xml) parser.listen(Listener.new) parser.parse
## Before (master)
$ ruby text_unnormalized.rb REXML(DOM) Text
REXML(Pull) start_element: ["root", {}] text: ["\n ", "\n "] start_element: ["A", {}] text: ["<P> <I> <B> Text </B> </I>", "
\r Text "] end_element: ["A"] text: ["\n", "\n"] end_element: ["root"] end_document: []
REXML(Stream) [:tag_start, "root", {}] [:text, "\n "] [:tag_start, "A", {}] [:text, "
\r Text "] [:tag_end, "A"] [:text, "\n"] [:tag_end, "root"]
REXML(SAX) [:start_document] [:start_element, nil, "root", "root", {}] [:progress, 6] [:characters, "\n "] [:progress, 9] [:start_element, nil, "A", "A", {}] [:progress, 12] [:characters, "<P>\r <I> <B> Text </B> </I>"] #<= This [:progress, 74] [:end_element, nil, "A", "A"] [:progress, 78] [:characters, "\n"] [:progress, 79] [:end_element, nil, "root", "root"] [:progress, 86] [:end_document]
## After(This PR)
$ ruby text_unnormalized.rb REXML(SAX) [:start_document] [:start_element, nil, "root", "root", {}] [:progress, 6] [:characters, "\n "] [:progress, 9] [:start_element, nil, "A", "A", {}] [:progress, 12] [:characters, "
\r Text "] [:progress, 74] [:end_element, nil, "A", "A"] [:progress, 78] [:characters, "\n"] [:progress, 79] [:end_element, nil, "root", "root"] [:progress, 86] [:end_document]
Thanks.
Why?
SAX2 parser expand user-defined entity references and character references but doesn't expand predefined entity references.
Change
xml = <<EOS
EOS
class Listener def method_missing(name, args) p [name, args] end end
puts "REXML(DOM)" REXML::Document.new(xml).elements.each("/root/A") {|element| puts element.text}
puts "" puts "REXML(Pull)" parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? res = parser.pull p res end
puts "" puts "REXML(Stream)" parser = REXML::Parsers::StreamParser.new(xml, Listener.new).parse
puts "" puts "REXML(SAX)" parser = REXML::Parsers::SAX2Parser.new(xml) parser.listen(Listener.new) parser.parse
$ ruby text_unnormalized.rb REXML(DOM) Text
REXML(Pull) start_element: ["root", {}] text: ["\n ", "\n "] start_element: ["A", {}] text: ["<P> <I> <B> Text </B> </I>", "
\r Text "] end_element: ["A"] text: ["\n", "\n"] end_element: ["root"] end_document: []
REXML(Stream) [:tag_start, "root", {}] [:text, "\n "] [:tag_start, "A", {}] [:text, "
\r Text "] [:tag_end, "A"] [:text, "\n"] [:tag_end, "root"]
REXML(SAX) [:start_document] [:start_element, nil, "root", "root", {}] [:progress, 6] [:characters, "\n "] [:progress, 9] [:start_element, nil, "A", "A", {}] [:progress, 12] [:characters, "<P>\r <I> <B> Text </B> </I>"] #<= This [:progress, 74] [:end_element, nil, "A", "A"] [:progress, 78] [:characters, "\n"] [:progress, 79] [:end_element, nil, "root", "root"] [:progress, 86] [:end_document]
$ ruby text_unnormalized.rb REXML(SAX) [:start_document] [:start_element, nil, "root", "root", {}] [:progress, 6] [:characters, "\n "] [:progress, 9] [:start_element, nil, "A", "A", {}] [:progress, 12] [:characters, "
\r Text "] [:progress, 74] [:end_element, nil, "A", "A"] [:progress, 78] [:characters, "\n"] [:progress, 79] [:end_element, nil, "root", "root"] [:progress, 86] [:end_document]