aredridel / html5

Event-driven HTML5 Parser in Javascript
http://dinhe.net/~aredridel/projects/js/html5/
MIT License
590 stars 168 forks source link

parse error #21

Closed demian85 closed 13 years ago

demian85 commented 13 years ago
var util = require('util'),
    zombie = require('zombie');

var browser = zombie.visit('http://www.google.com/search?q=twitter', {
    runScripts : false,
    debug : true,
    userAgent : "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.98 Safari/534.13"
}, function(err, browser, st) {
    if (err) throw err;
    browser.dump();
});

throws...

Zombie: GET http://www.google.com/search?q=twitter
Zombie: GET http://www.google.com/search?q=twitter => 200
Zombie: GET http://www.google.com/blank.html

/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/tokenizer.js:62
                throw(e);
    ^
TypeError: Cannot read property '27' of undefined
    at EventEmitter.consume_numeric_entity (/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/tokenizer.js:174:32)
    at EventEmitter.consume_entity (/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/tokenizer.js:103:16)
    at EventEmitter.entity_data_state (/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/tokenizer.js:251:20)
    at EventEmitter.<anonymous> (/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/tokenizer.js:59:25)
    at EventEmitter.emit (events.js:42:17)
    at EventEmitter.pump (/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/tokenizer.js:45:11)
    at EventEmitter.tokenize (/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/tokenizer.js:78:21)
    at EventEmitter.parse (/usr/local/lib/node/.npm/html5/0.2.14/package/lib/html5/parser.js:47:17)
    at HtmlToDom.appendHtmlToElement (/usr/local/lib/node/.npm/jsdom/0.2.0/package/lib/jsdom/browser/htmltodom.js:90:50)
    at Object.innerHTML (/usr/local/lib/node/.npm/jsdom/0.2.0/package/lib/jsdom/browser/index.js:334:27)
demian85 commented 13 years ago

hello? any news? is this module in active development? I really could never use this module without getting any errors. Every time I try a new thing with the zombie module, everything breaks because of this library... is someone trying to fix this errors?

applegrew commented 13 years ago

I too face this issue. In tokenizer.js HTML5.WINDOWS1252 comes as undefined.

applegrew commented 13 years ago

Ok found the bug.

WINDOWS1252 seems to have been renamed as ENTITIES_WINDOWS1252 in constants.js but tokenizer.js still uses the old variable!

Quick fix rename HTML5.WINDOWS1252 to HTML5.ENTITIES_WINDOWS1252 in tokenizer.js. :)

aredridel commented 13 years ago

Fixed in v0.2.15