use std::fs;
use std::io::Cursor;
use std::default::Default;
extern crate html5ever;
use html5ever::parse_document;
use html5ever::driver::ParseOpts;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
use html5ever::rcdom::RcDom;
fn main() {
let buf = fs::read("failing").expect("Unable to read file");
let mut buff = Cursor::new(buf);
let opts = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
scripting_enabled: false,
..Default::default()
},
..Default::default()
};
match parse_document(RcDom::default(), opts)
.from_utf8()
.read_from(&mut buff) {
_ => {}
}
}
Cargo.toml
[package]
name = "rust-warc-streaming-parser"
version = "0.1.0"
authors = ["Jacek Wielemborek <d33tah@gmail.com>"]
[dependencies]
html5ever = "*"
thread 'main' panicked at 'assertion failed: c.is_some()', /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:554:9
stack backtrace:
0: std::sys::unix::backtrace::tracing::imp::unwind_backtrace
at libstd/sys/unix/backtrace/tracing/gcc_s.rs:49
1: std::sys_common::backtrace::print
at libstd/sys_common/backtrace.rs:71
at libstd/sys_common/backtrace.rs:59
2: std::panicking::default_hook::{{closure}}
at libstd/panicking.rs:211
3: std::panicking::default_hook
at libstd/panicking.rs:227
4: std::panicking::rust_panic_with_hook
at libstd/panicking.rs:475
5: std::panicking::begin_panic
at /checkout/src/libstd/panicking.rs:409
6: <html5ever::tokenizer::Tokenizer<Sink>>::discard_char
at ./<panic macros>:3
7: <html5ever::tokenizer::Tokenizer<Sink>>::step
at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:569
8: <html5ever::tokenizer::Tokenizer<Sink>>::run
at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:361
9: <html5ever::tokenizer::Tokenizer<Sink>>::feed
at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/tokenizer/mod.rs:219
10: <html5ever::driver::Parser<Sink> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::process
at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/html5ever-0.22.3/src/driver.rs:88
11: <tendril::stream::Utf8LossyDecoder<Sink, A> as tendril::stream::TendrilSink<tendril::fmt::Bytes, A>>::process
at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/tendril-0.4.0/src/stream.rs:179
12: tendril::stream::TendrilSink::read_from
at /home/d33tah/.cargo/registry/src/github.com-1ecc6299db9ec823/tendril-0.4.0/src/stream.rs:79
13: rust_warc_streaming_parser::main
at src/main.rs:29
14: std::rt::lang_start::{{closure}}
at /checkout/src/libstd/rt.rs:74
15: std::panicking::try::do_call
at libstd/rt.rs:59
at libstd/panicking.rs:310
16: __rust_maybe_catch_panic
at libpanic_unwind/lib.rs:106
17: std::rt::lang_start_internal
at libstd/panicking.rs:289
at libstd/panic.rs:392
at libstd/rt.rs:58
18: std::rt::lang_start
at /checkout/src/libstd/rt.rs:74
19: main
20: __libc_start_main
21: _start
And here's failing file, compressed with zip: failing.zip
src/main.rs
:Cargo.toml
And here's
failing
file, compressed withzip
: failing.zip