tautologistics / node-htmlparser

Forgiving HTML/XML/RSS Parser in JS for *both* Node and Browsers
MIT License
1.15k stars 139 forks source link

1.x: HTML comment delimiters inside <script> ends the tag #46

Open papandreou opened 12 years ago

papandreou commented 12 years ago
var rawHtml = '<script>document.write("<!--hello-->");</script>',
    htmlparser = require('./lib/htmlparser'),
    handler = new htmlparser.DefaultHandler(),
    parser = new htmlparser.Parser(handler);
parser.parseComplete(rawHtml);
console.warn(require('util').inspect(handler.dom, false, null));

Output:

[ { raw: 'script',
    data: 'script',
    type: 'script',
    name: 'script',
    children: 
     [ { raw: 'document.write("',
         data: 'document.write("',
         type: 'text' },
       { raw: 'hello', data: 'hello', type: 'comment' },
       { raw: '");', data: '");', type: 'text' } ] } ]

Expected output:

[ { raw: 'script',
    data: 'script',
    type: 'script',
    name: 'script',
    children: 
     [ { raw: 'document.write("<!--hello-->");',
         data: 'document.write("<!--hello-->");',
         type: 'text' } ] } ]
tautologistics commented 12 years ago

If you have a patch or pull request I will apply it to the 1.x branch, otherwise I am focusing efforts on getting 2.x tested and released.