servo / html5ever

High-performance browser-grade HTML5 parser
Other
2.1k stars 214 forks source link

thread 'main' panicked at 'assertion failed: c.is_some()' #351

Open d33tah opened 5 years ago

d33tah commented 5 years ago

src/main.rs:

use std::fs;
use std::io::Cursor;

use std::default::Default;

extern crate html5ever;
use html5ever::parse_document;
use html5ever::driver::ParseOpts;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
use html5ever::rcdom::RcDom;

fn main() {

    let buf = fs::read("failing").expect("Unable to read file");

    let mut buff = Cursor::new(buf);

    let opts = ParseOpts {
        tree_builder: TreeBuilderOpts {
            drop_doctype: true,
            scripting_enabled: false,
            ..Default::default()
        },
        ..Default::default()
    };

    match parse_document(RcDom::default(), opts)
        .from_utf8()
        .read_from(&mut buff) {
        _ => {}
    }

}

Cargo.toml

[package]
name = "rust-warc-streaming-parser"
version = "0.1.0"
authors = ["Jacek Wielemborek <d33tah@gmail.com>"]

[dependencies]
html5ever = "*"
thread 'main' panicked at 'assertion failed: c.is_some()', /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:554:9
stack backtrace:
   0: std::sys::unix::backtrace::tracing::imp::unwind_backtrace
             at libstd/sys/unix/backtrace/tracing/gcc_s.rs:49
   1: std::sys_common::backtrace::print
             at libstd/sys_common/backtrace.rs:71
             at libstd/sys_common/backtrace.rs:59
   2: std::panicking::default_hook::{{closure}}
             at libstd/panicking.rs:211
   3: std::panicking::default_hook
             at libstd/panicking.rs:227
   4: std::panicking::rust_panic_with_hook
             at libstd/panicking.rs:475
   5: std::panicking::begin_panic
             at /checkout/src/libstd/panicking.rs:409
   6: <html5ever::tokenizer::Tokenizer<Sink>>::discard_char
             at ./<panic macros>:3
   7: <html5ever::tokenizer::Tokenizer<Sink>>::step
             at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:569
   8: <html5ever::tokenizer::Tokenizer<Sink>>::run
             at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:361
   9: <html5ever::tokenizer::Tokenizer<Sink>>::feed
             at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:219
  10: <html5ever::driver::Parser<Sink> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::process
             at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/driver.rs:88
  11: <tendril::stream::Utf8LossyDecoder<Sink, A> as tendril::stream::TendrilSink<tendril::fmt::Bytes, A>>::process
             at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/tendril-0.4.0/src/stream.rs:179
  12: tendril::stream::TendrilSink::read_from
             at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/tendril-0.4.0/src/stream.rs:79
  13: rust_warc_streaming_parser::main
             at src/main.rs:29
  14: std::rt::lang_start::{{closure}}
             at /checkout/src/libstd/rt.rs:74
  15: std::panicking::try::do_call
             at libstd/rt.rs:59
             at libstd/panicking.rs:310
  16: __rust_maybe_catch_panic
             at libpanic_unwind/lib.rs:106
  17: std::rt::lang_start_internal
             at libstd/panicking.rs:289
             at libstd/panic.rs:392
             at libstd/rt.rs:58
  18: std::rt::lang_start
             at /checkout/src/libstd/rt.rs:74
  19: main
  20: __libc_start_main
  21: _start

And here's failing file, compressed with zip: failing.zip

Eijebong commented 5 years ago

Pretty sure this is #305