Closed stanhu closed 3 years ago
Hmm, this crashes Ruby 3.0 indeed.
Hmm, this crashes Ruby 3.0 indeed.
I wonder if this is related to Ractors: https://github.com/ruby/ruby/blob/ruby_3_0/gc.c#L10166-L10171
This is crashing in libpthread
I wonder as well: I believe marking is done recursively, starting with root objects. These objects here are modules. Perhaps we shouldn't explicitly mark the submodules. Could we try to only mark mHamlit
and see if that helps?
Could we try to only mark mHamlit and see if that helps?
Good idea. Let me revise.
I think this still crashes in Ruby 3.0, though.
made objects by rb_define_module_under()
are pinned. Don't worry.
I'm still seeing this failure in my reproduction script:
-- Control frame information -----------------------------------------------
c:0005 p:---- s:0020 e:000019 CFUNC :build_class
c:0004 p:0045 s:0013 e:000012 BLOCK test3.rb:9 [FINISH]
c:0003 p:---- s:0010 e:000009 CFUNC :times
c:0002 p:0017 s:0006 e:000005 EVAL test3.rb:5 [FINISH]
c:0001 p:0000 s:0003 E:001ae0 (none) [FINISH]
-- Ruby level backtrace information ----------------------------------------
test3.rb:5:in `<main>'
test3.rb:5:in `times'
test3.rb:9:in `block in <main>'
test3.rb:9:in `build_class'
-- Machine register context ------------------------------------------------
rax: 0x0000000000000000 rbx: 0x00007f8442863d50 rcx: 0x000000010b012ff0
rdx: 0x0000000000000000 rdi: 0x00007f84620b1fb0 rsi: 0x00000000000126db
rbp: 0x00007ffee4f5a680 rsp: 0x00007ffee4f5a630 r8: 0x0000000000000000
r9: 0x00007f8442863bd0 r10: 0x00007f8461cfea90 r11: 0x000000010af1f180
r12: 0x0000000000000000 r13: 0x00000000000126db r14: 0x00007f84620b1fb0
r15: 0x00007f84620b1fb0 rip: 0x000000010af217e7 rfl: 0x0000000000010202
-- C level backtrace information -------------------------------------------
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_vm_bugreport+0x6cf) [0x10af52b8f]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_bug_for_fatal_signal+0x1d6) [0x10ad6b956]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(sigsegv+0x5b) [0x10aeaa72b]
/usr/lib/system/libsystem_platform.dylib(_sigtramp+0x1d) [0x7fff6cd2b5fd]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_const_search_from+0x47) [0x10af217e7]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_const_get_0+0x4a) [0x10af1f07a]
/Users/stanhu/.rbenv/versions/3.0.0/lib/ruby/gems/3.0.0/gems/hamlit-2.14.1/lib/hamlit/hamlit.bundle(hamlit_build_class+0x315) [0x10c6ded85]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(vm_call_cfunc_with_frame+0x14f) [0x10af48ebf]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(vm_sendish+0x52c) [0x10af40d9c]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(vm_exec_core+0x3de0) [0x10af26d10]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_vm_exec+0xb39) [0x10af3b889]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(invoke_block_from_c_bh+0x73c) [0x10af4cdbc]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_yield_1+0x77) [0x10af33577]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(int_dotimes+0xc8) [0x10ae08098]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(vm_call_cfunc_with_frame+0x14f) [0x10af48ebf]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(vm_sendish+0x52c) [0x10af40d9c]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(vm_exec_core+0x3d7e) [0x10af26cae]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_vm_exec+0xb39) [0x10af3b889]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(rb_ec_exec_node+0x127) [0x10ad76bb7]
/Users/stanhu/.rbenv/versions/3.0.0/lib/libruby.3.0.dylib(ruby_run_node+0x57) [0x10ad76a37]
/Users/stanhu/.rbenv/versions/3.0.0/bin/ruby(main+0x5d) [0x10aca7f0d]
-- Other runtime information -----------------------------------------------
Hmm, are modules defined via rb_define_module
pinned as well? I also see a call to rb_gc_register_mark_object
as well. Is there some other issue here?
Running the script a few times on Linux yields this:
-- Machine register context ------------------------------------------------
RIP: 0x00007f04320b7244 RBP: 0x00007ffdc5986f50 RSP: 0x00007ffdc5986ef8
RAX: 0x0000000000000000 RBX: 0x0000556a57080fe0 RCX: 0x00007f04320d8660
RDX: 0x0000000000000000 RDI: 0x0000556a57080fe0 RSI: 0x00000000000117cb
R8: 0x0000556a56bc75d0 R9: 0x0000556a56bc7570 R10: 0x0000000000000025
R11: 0x00007f04320b8ee0 R12: 0x00000000000117cb R13: 0x0000556a57080fe0
R14: 0x0000000000000000 R15: 0x00007ffdc5986f18 EFL: 0x0000000000010246
-- C level backtrace information -------------------------------------------
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_vm_bugreport+0x79f) [0x7f04320e38bf] vm_dump.c:755
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_bug_for_fatal_signal+0xdd) [0x7f0431f03f2d] error.c:660
/opt/gitlab/embedded/lib/libruby.so.2.7(sigsegv+0x52) [0x7f04320477d2] signal.c:946
/lib/x86_64-linux-gnu/libc.so.6(0x7f0431a87fd0) [0x7f0431a87fd0]
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_const_lookup+0x4) [0x7f04320b7244] variable.c:3354
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_const_get+0x35) [0x7f04320b8f15] variable.c:2339
/opt/gitlab/embedded/lib/ruby/gems/2.7.0/gems/hamlit-2.11.0/lib/hamlit/hamlit.so(str_space+0x16) [0x7f042c426e39] hamlit.c:16
/opt/gitlab/embedded/lib/ruby/gems/2.7.0/gems/hamlit-2.11.0/lib/hamlit/hamlit.so(hamlit_build_class) hamlit.c:158
/opt/gitlab/embedded/lib/ruby/gems/2.7.0/gems/hamlit-2.11.0/lib/hamlit/hamlit.so(rb_hamlit_build_class+0x5b) [0x7f042c4272ab] hamlit.c:478
/opt/gitlab/embedded/lib/libruby.so.2.7(vm_call_cfunc+0x11f) [0x7f04320c02ff] vm_insnhelper.c:2514
/opt/gitlab/embedded/lib/libruby.so.2.7(vm_call_method+0xc5) [0x7f04320dc565] vm_insnhelper.c:3053
/opt/gitlab/embedded/lib/libruby.so.2.7(vm_exec_core+0x154) [0x7f04320cb7d4] vm_insnhelper.c:4023
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_vm_exec+0x98) [0x7f04320d1c68] vm.c:1920
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_yield_1+0x19a) [0x7f04320d4a6a] vm.c:1116
/opt/gitlab/embedded/lib/libruby.so.2.7(int_dotimes+0x48) [0x7f0431fa6b98] numeric.c:5201
/opt/gitlab/embedded/lib/libruby.so.2.7(vm_call_cfunc+0x11f) [0x7f04320c02ff] vm_insnhelper.c:2514
/opt/gitlab/embedded/lib/libruby.so.2.7(vm_call_method+0xc5) [0x7f04320dc565] vm_insnhelper.c:3053
/opt/gitlab/embedded/lib/libruby.so.2.7(vm_exec_core+0x209) [0x7f04320cb889] vm_insnhelper.c:4023
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_vm_exec+0x7c7) [0x7f04320d2397] vm.c:1929
/opt/gitlab/embedded/lib/libruby.so.2.7(rb_ec_exec_node+0xa0) [0x7f0431f0aff0] eval.c:278
Is the actual issue here the rb_str_new_cstr
calls are not pinned?
I successfully reproduced the issue with your script, but after I rebuilt the gem, it never happened again... It'd be really helpful if we can somehow increase the possibility of reproduction, because I cannot verify any fix on my end anymore.
But this output https://github.com/k0kubun/hamlit/pull/171#issuecomment-763496309 is really helpful. It shows inlined frames and line numbers of Hamlit unlike https://github.com/k0kubun/hamlit/pull/171#issuecomment-763483616. It's unfortunate that this is not Ruby 3.0's but Ruby 2.7's though.
According to the backtrace, the problem seems to happen in rb_const_get(mAttributeBuilder, id_space)
inside str_space()
. Assuming rb_intern()
's result doesn't move (does it?), reference to mAttributeBuilder
is the most suspicious, even while ko1 said objects made by rb_define_module_under
are pinned. It might be specific to Ruby 3.0 though (I'm not sure).
This is very hard to fix on my end because your script doesn't reproduce the issue on my environment (Linux x86_64) anymore. I've reproduced only once so far.
ko1 advised me to check if mAttributeBuilder
is moved or not to confirm my suspicion. Could you check if this code shows 1
before it SEGVs on your environment?
diff --git a/ext/hamlit/hamlit.c b/ext/hamlit/hamlit.c
index 11efb33..d883678 100644
--- a/ext/hamlit/hamlit.c
+++ b/ext/hamlit/hamlit.c
@@ -9,12 +9,12 @@ static ID id_flatten, id_keys, id_parse, id_prepend, id_tr, id_uniq_bang;
static ID id_aria, id_data, id_equal, id_hyphen, id_space, id_underscore;
static ID id_boolean_attributes, id_xhtml;
-static VALUE str_aria() { return rb_const_get(mAttributeBuilder, id_aria); }
-static VALUE str_data() { return rb_const_get(mAttributeBuilder, id_data); }
-static VALUE str_equal() { return rb_const_get(mAttributeBuilder, id_equal); }
-static VALUE str_hyphen() { return rb_const_get(mAttributeBuilder, id_hyphen); }
-static VALUE str_space() { return rb_const_get(mAttributeBuilder, id_space); }
-static VALUE str_underscore() { return rb_const_get(mAttributeBuilder, id_underscore); }
+static VALUE str_aria() { fprintf(stderr, "%d", RB_TYPE_P(mAttributeBuilder, T_MOVED)); return rb_const_get(mAttributeBuilder, id_aria); }
+static VALUE str_data() { fprintf(stderr, "%d", RB_TYPE_P(mAttributeBuilder, T_MOVED)); return rb_const_get(mAttributeBuilder, id_data); }
+static VALUE str_equal() { fprintf(stderr, "%d", RB_TYPE_P(mAttributeBuilder, T_MOVED)); return rb_const_get(mAttributeBuilder, id_equal); }
+static VALUE str_hyphen() { fprintf(stderr, "%d", RB_TYPE_P(mAttributeBuilder, T_MOVED)); return rb_const_get(mAttributeBuilder, id_hyphen); }
+static VALUE str_space() { fprintf(stderr, "%d", RB_TYPE_P(mAttributeBuilder, T_MOVED)); return rb_const_get(mAttributeBuilder, id_space); }
+static VALUE str_underscore() { fprintf(stderr, "%d", RB_TYPE_P(mAttributeBuilder, T_MOVED)); return rb_const_get(mAttributeBuilder, id_underscore); }
static void
delete_falsey_values(VALUE values)
It returns 0
:
$ ruby fail.rb
0fail.rb:9: [BUG] Segmentation fault at 0x0000000000000018
ruby 2.7.2p137 (2020-10-01 revision 5445e04352) [x86_64-linux]
-- Control frame information -----------------------------------------------
c:0005 p:---- s:0020 e:000019 CFUNC :build_class
c:0004 p:0045 s:0013 e:000012 BLOCK fail.rb:9 [FINISH]
c:0003 p:---- s:0010 e:000009 CFUNC :times
c:0002 p:0017 s:0006 e:000005 EVAL fail.rb:5 [FINISH]
c:0001 p:0000 s:0003 E:001f00 (none) [FINISH]
-- Ruby level backtrace information ----------------------------------------
fail.rb:5:in `<main>'
fail.rb:5:in `times'
fail.rb:9:in `block in <main>'
fail.rb:9:in `build_class'
-- Machine register context ------------------------------------------------
RIP: 0x00007f88a9640088 RBP: 0x000000000001237b RSP: 0x00007fff22729ca0
RAX: 0x0000000000000000 RBX: 0x000055a6bdb47f08 RCX: 0x0000000000000000
RDX: 0x00007f88a93bd8b0 RDI: 0x000055a6bdb47f08 RSI: 0x000000000001237b
R8: 0x0000000000000001 R9: 0x0000000000000000 R10: 0x0000000000000026
R11: 0x00007f88a9641de0 R12: 0x000055a6bdb47f08 R13: 0x00007fff22729cc0
R14: 0x0000000000000000 R15: 0x000055a6bdad6aa0 EFL: 0x0000000000010246
-- C level backtrace information -------------------------------------------
/home/stanhu/.rbenv/versions/2.7.2/lib/libruby.so.2.7(rb_vm_bugreport+0x7ce) [0x7f88a966d28e] vm_dump.c:755
/home/stanhu/.rbenv/versions/2.7.2/lib/libruby.so.2.7(rb_bug_for_fatal_signal+0xe7) [0x7f88a948c6e7] error.c:660
/home/stanhu/.rbenv/versions/2.7.2/lib/libruby.so.2.7(sigsegv+0x4b) [0x7f88a95d06eb] signal.c:946
/lib/x86_64-linux-gnu/libc.so.6(0x7f88a900efd0) [0x7f88a900efd0]
/home/stanhu/.rbenv/versions/2.7.2/lib/libruby.so.2.7(rb_const_lookup+0x18) [0x7f88a9640088] variable.c:3354
/home/stanhu/.rbenv/versions/2.7.2/lib/libruby.so.2.7(rb_const_get+0x41) [0x7f88a9641e21] variable.c:2339
/home/stanhu/.rbenv/versions/2.7.2/lib/ruby/gems/2.7.0/gems/hamlit-2.14.1/lib/hamlit/hamlit.so(str_space+0x52) [0x7f889f19c6b6] hamlit.c:16
/home/stanhu/.rbenv/versions/2.7.2/lib/ruby/gems/2.7.0/gems/hamlit-2.14.1/lib/hamlit/hamlit.so(hamlit_build_class) hamlit.c:157
/home/stanhu/.rbenv/versions/2.7.2/lib/ruby/gems/2.7.0/gems/hamlit-2.14.1/lib/hamlit/hamlit.so(rb_hamlit_build_class+0x6e) [0x7f889f19cb7e] hamlit.c:4
77
I also checked mObjectRef
, and it returns 0 as well.
It seems that I can easily reproduce this with Ruby 2.7.2. I'll take a look at the details.
I've failed to find anything moved. By the way, is it possible to reproduce this issue without require 'google/apis'
? I thought potentially the class reference tree could be broken due to google-api-client and any rb_const_get
could be failing due to the gem whether it's Hamlit or not.
It's still not clear why it's happening, but I found one workaround. I'm implementing it.
Thanks! I'll close this pull request then.
I was able to reproduce this without the require 'google/apis'
.
Could you check if https://github.com/k0kubun/hamlit/pull/172 works? I'll release this tomorrow if it works out.
Let me give it a shot.
FYI, I haven't yet been able to reproduce the failure with this patch. Note I first tried marking the result of the rb_obj_freeze(str)
, but that didn't seem to work.
diff --git a/ext/hamlit/hamlit.c b/ext/hamlit/hamlit.c
index 11efb33..4ef812c 100644
--- a/ext/hamlit/hamlit.c
+++ b/ext/hamlit/hamlit.c
@@ -510,6 +510,16 @@ rb_hamlit_build(int argc, VALUE *argv, RB_UNUSED_VAR(VALUE self))
return hamlit_build(argv[0], argv[1], argv[2], argv[3], array);
}
+void
+set_attribute_builder_const(ID id, const char *name)
+{
+ VALUE str = rb_str_new_cstr(name);
+ VALUE obj = rb_obj_freeze(str);
+
+ rb_gc_register_mark_object(str);
+ rb_const_set(mAttributeBuilder, id, obj);
+}
+
void
Init_hamlit(void)
{
@@ -544,11 +554,11 @@ Init_hamlit(void)
id_boolean_attributes = rb_intern("BOOLEAN_ATTRIBUTES");
id_xhtml = rb_intern("xhtml");
- rb_const_set(mAttributeBuilder, id_aria, rb_obj_freeze(rb_str_new_cstr("aria")));
- rb_const_set(mAttributeBuilder, id_data, rb_obj_freeze(rb_str_new_cstr("data")));
- rb_const_set(mAttributeBuilder, id_equal, rb_obj_freeze(rb_str_new_cstr("=")));
- rb_const_set(mAttributeBuilder, id_hyphen, rb_obj_freeze(rb_str_new_cstr("-")));
- rb_const_set(mAttributeBuilder, id_space, rb_obj_freeze(rb_str_new_cstr(" ")));
- rb_const_set(mAttributeBuilder, id_underscore, rb_obj_freeze(rb_str_new_cstr("_")));
+ set_attribute_builder_const(id_aria, "aria");
+ set_attribute_builder_const(id_data, "data");
+ set_attribute_builder_const(id_equal, "=");
+ set_attribute_builder_const(id_hyphen, "-");
+ set_attribute_builder_const(id_space, " ");
+ set_attribute_builder_const(id_underscore, "_");
}
#endif
Eh, never mind, it just failed. Let me try your patch.
Thanks for checking it quickly. I released it as v2.14.2.
This fixes the known issue with compaction as described in https://bugs.ruby-lang.org/issues/15626#Known-Issue.
Since Hamlit holds points to references to VALUE pointers, these VALUE pointers could move due to compaction. To avoid this, we pin these objects with
rb_gc_mark
.I was able to reproduce this issue intermittently via: