Open honggyukim opened 5 years ago
Here is gdb output.
# gdb -q --args `which uftrace` record -l /usr/bin/qml-runner --main hello.qml
Reading symbols from /usr/bin/uftrace...done.
(gdb) r
Starting program: /usr/bin/uftrace record -l /usr/bin/qml-runner --main hello.qml
warning: Unable to find libthread_db matching inferior's thread library, thread debugging will not be available.
[Attaching after process 9950 fork to child process 9958]
[New inferior 2 (process 9958)]
[Detaching after fork from parent process 9950]
[Inferior 1 (process 9950) detached]
warning: Unable to find libthread_db matching inferior's thread library, thread debugging will not be available.
process 9958 is executing new program: /usr/bin/qml-runner
warning: Unable to find libthread_db matching inferior's thread library, thread debugging will not be available.
...
Thread 2.1 "qml-runner" received signal SIGSEGV, Segmentation fault.
[Switching to LWP 9958]
0x76fe6940 in _dl_signal_cexception (errcode=0, exception=0x768248dc <QQmlMemoryScope::state>,
occasion=0x76ffe900 <__stack_chk_guard> "") at dl-error-skeleton.c:148
148 dl-error-skeleton.c: No such file or directory.
(gdb) bt
#0 0x76fe6940 in _dl_signal_cexception (errcode=0, exception=0x768248dc <QQmlMemoryScope::state>,
occasion=0x76ffe900 <__stack_chk_guard> "") at dl-error-skeleton.c:148
#1 0x7681dcd0 in ?? () from /usr/lib/libQt5Qml.so.5
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
Here is glibc source code.
$ dl-error-skeleton.c
...
60 #if DL_ERROR_BOOTSTRAP
61 /* This points to a function which is called when an continuable error is
62 received. Unlike the handling of `catch' this function may return.
63 The arguments will be the `errstring' and `objname'.
64
65 Since this functionality is not used in normal programs (only in ld.so)
66 we do not care about multi-threaded programs here. We keep this as a
67 global variable. */
68 static receiver_fct receiver;
69 #endif /* DL_ERROR_BOOTSTRAP */
...
129 #if DL_ERROR_BOOTSTRAP
130 void
131 _dl_signal_cexception (int errcode, struct dl_exception *exception,
132 const char *occasion)
133 {
134 if (__builtin_expect (GLRO(dl_debug_mask)
135 & ~(DL_DEBUG_STATISTICS|DL_DEBUG_PRELINK), 0))
136 _dl_debug_printf ("%s: error: %s: %s (%s)\n",
137 exception->objname, occasion,
138 exception->errstring, receiver ? "continued" : "fatal");
139
140 if (receiver)
141 {
142 /* We are inside _dl_receive_error. Call the user supplied
143 handler and resume the work. The receiver will still be
144 installed. */
145 (*receiver) (errcode, exception->objname, exception->errstring);
146 }
147 else
148 _dl_signal_exception (errcode, exception, occasion);
149 }
The possibly related code that call _dl_signal_cexception
is as follows:
$ cat dl-lookup.c
...
/* Search loaded objects' symbol tables for a definition of the symbol
UNDEF_NAME, perhaps with a requested version for the symbol.
We must never have calls to the audit functions inside this function
or in any function which gets called. If this would happen the audit
code might create a thread which can throw off all the scope locking. */
lookup_t
_dl_lookup_symbol_x (const char *undef_name, struct link_map *undef_map,
const ElfW(Sym) **ref,
struct r_scope_elem *symbol_scope[],
const struct r_found_version *version,
int type_class, int flags, struct link_map *skip_map)
{
...
/* Search the relevant loaded objects for a definition. */
for (size_t start = i; *scope != NULL; start = 0, ++scope)
{
int res = do_lookup_x (undef_name, new_hash, &old_hash, *ref,
¤t_value, *scope, start, version, flags,
skip_map, type_class, undef_map);
if (res > 0)
break;
if (__glibc_unlikely (res < 0) && skip_map == NULL)
{
/* Oh, oh. The file named in the relocation entry does not
contain the needed symbol. This code is never reached
for unversioned lookups. */
assert (version != NULL);
const char *reference_name = undef_map ? undef_map->l_name : "";
struct dl_exception exception;
/* XXX We cannot translate the message. */
_dl_exception_create_format
(&exception, DSO_FILENAME (reference_name),
"symbol %s version %s not defined in file %s"
" with link time reference%s",
undef_name, version->name, version->filename,
res == -2 ? " (no version symbols)" : "");
_dl_signal_cexception (0, &exception, N_("relocation error"));
_dl_exception_free (&exception);
*ref = NULL;
return 0;
}
}
if (__glibc_unlikely (current_value.s == NULL))
{
if ((*ref == NULL || ELFW(ST_BIND) ((*ref)->st_info) != STB_WEAK)
&& !(GLRO(dl_debug_mask) & DL_DEBUG_UNUSED))
{
/* We could find no value for a strong reference. */
const char *reference_name = undef_map ? undef_map->l_name : "";
const char *versionstr = version ? ", version " : "";
const char *versionname = (version && version->name
? version->name : "");
struct dl_exception exception;
/* XXX We cannot translate the message. */
_dl_exception_create_format
(&exception, DSO_FILENAME (reference_name),
"undefined symbol: %s%s%s",
undef_name, versionstr, versionname);
_dl_signal_cexception (0, &exception, N_("symbol lookup error"));
_dl_exception_free (&exception);
}
*ref = NULL;
return 0;
}
...
}
Here is verbose log near the segfault point.
# uftrace record --debug-domain plthook:3 --debug-domain mcount:3 --debug-domain uftrace:3 \
-l /usr/bin/qml-runner --main hello.qml
...
mcount: rstack[8] ENTRY 75d6b5dc [38/1903]
mcount: rstack[8] EXIT 75d6b5dc
plthook: [idx: 36] exit 75d733d9: malloc (resolved addr: 75e1b7e9)
mcount: <8> exit 75d6b54c
mcount: task 13804 recorded 16 bytes (record count = 1)
mcount: rstack[7] EXIT 75d6b54c
plthook: [idx: 24] exit 75d6dd93: dl_iterate_phdr (resolved addr: 75e83019)
plthook: [idx: 14] enter 75d6b4d4: sigprocmask@plt (mod: 1d79d20)
mcount: <7> enter 75d6b4d4
mcount: tr->flags: 0, filter mode: 0, count: 0/0, depth: 1017
mcount: <8> exit 75d6b4d4
mcount: task 13804 recorded 32 bytes (record count = 2)
mcount: rstack[7] ENTRY 75d6b4d4
mcount: rstack[7] EXIT 75d6b4d4
plthook: [idx: 14] exit 75d6dd9d: sigprocmask (resolved addr: 75debd35)
uftrace: MSG DLOPEN: 13804: 0x71333000 (null)
uftrace: MSG DLOPEN: 13804: 0x71322000 (null)
uftrace: MSG DLOPEN: 13804: 0x712f0000 (null)
uftrace: MSG DLOPEN: 13804: 0x712e9000 (null)
uftrace: MSG DLOPEN: 13804: 0x712c2000 (null)
uftrace: MSG DLOPEN: 13804: 0x70a11000 (null)
uftrace: MSG DLOPEN: 13804: 0x71433000 (null)
uftrace: MSG DLOPEN: 13804: 0x7140d000 (null)
uftrace: MSG DLOPEN: 13804: 0x71380000 (null)
uftrace: MSG DLOPEN: 13804: 0x71378000 (null)
uftrace: MSG DLOPEN: 13804: 0x76004000 (null)
uftrace: MSG DLOPEN: 13804: 0x71364000 (null)
uftrace: MSG DLOPEN: 13804: 0x71333000 (null)
uftrace: MSG DLOPEN: 13804: 0x71322000 (null)
uftrace: MSG DLOPEN: 13804: 0x712f0000 (null)
uftrace: MSG DLOPEN: 13804: 0x712e9000 (null)
uftrace: MSG DLOPEN: 13804: 0x712c2000 (null)
uftrace: MSG DLOPEN: 13804: 0x70a11000 (null)
uftrace: MSG DLOPEN: 13804: 0x6ff05000 (null)
uftrace: MSG DLOPEN: 13804: 0x70a07000 (null)
uftrace: MSG DLOPEN: 13804: 0x6ec4e000 (null)
uftrace: MSG START: /uftrace-bf0d44f9ef11f6f2-13827-000
uftrace: MSG TASK_START : 13804/13827
uftrace: MSG START: /uftrace-bf0d44f9ef11f6f2-13828-000
uftrace: MSG TASK_START : 13804/13828
uftrace: MSG START: /uftrace-bf0d44f9ef11f6f2-13829-000
uftrace: MSG TASK_START : 13804/13829
uftrace: MSG END : /uftrace-bf0d44f9ef11f6f2-13804-002
uftrace: MSG START: /uftrace-bf0d44f9ef11f6f2-13804-000
uftrace: all process/thread exited
child terminated by signal: 11: Segmentation fault
uftrace: fill header (metadata) info in uftrace.data/info
uftrace: stop writer thread 0
uftrace: reading uftrace.data/task.txt file
uftrace: flushing /uftrace-bf0d44f9ef11f6f2-13819-000
uftrace: flushing /uftrace-bf0d44f9ef11f6f2-13820-000
uftrace: make a new write buffer
uftrace: flushing /uftrace-bf0d44f9ef11f6f2-13821-000
uftrace: make a new write buffer
uftrace: flushing /uftrace-bf0d44f9ef11f6f2-13822-000
uftrace: make a new write buffer
...
As I described above, it always gets crashed at the same place.
# uftrace replay
...
2.500 us [ 15318] | free(0x1157f38);
2.917 us [ 15318] | malloc(28) = 0x11574b0;
72.171 ms [ 15318] | } /* dl_iterate_phdr */
7.708 us [ 15318] | sigprocmask(SIG_SETMASK, 0x717cbb7c, 0) = 0;
uftrace stopped tracing with remaining functions
================================================
task: 15318
[6] _Uarm_step
[5] dlsym
[4] qt_linux_find_symbol_sys
[3] QQmlMemoryScope::doOpenLibrary
[2] QV8Engine::QV8Engine
[1] QJSEngine::QJSEngine
[0] QQmlEngine::QQmlEngine
task: 15365
[3] g_main_context_iteration
[2] QEventLoop::processEvents
[1] QEventLoop::exec
[0] QThread::exec
Hmm.. This problem looks like related to recover problem same as #722.
It seems that we cannot avoid this kind of problem even with -finstrument-functions
because plthook always hijacks the return address regardless of the compilation flag.
We need another way to implement auto-recover in ARM as well.
I tested it again.
# ./uftrace --version
uftrace v0.9.2-127-gc7145 ( perf sched )
# ./uftrace record -L. -la -H 10.177.240.176 /usr/bin/qml-runner --main hello.qml
...
qttestability successfully loaded
child terminated by signal: 11: Segmentation fault
Here is gdb test result.
# gdb -q --args ./uftrace record -L. -l /usr/bin/qml-runner --main hello.qml
Reading symbols from ./uftrace...done.
(gdb) r
Starting program: /home/root/uftrace record -L. -l /usr/bin/qml-runner --main hello.qml
...
Thread 2.1 "qml-runner" received signal SIGSEGV, Segmentation fault.
[Switching to LWP 20114]
0xf77d792c in _dl_signal_cexception (errcode=8598144, exception=0x8334bc, occasion=0x8334b4 "\331XO\365") at dl-error-skeleton.c:148
148 dl-error-skeleton.c: No such file or directory.
(gdb) bt
#0 0xf77d792c in _dl_signal_cexception (errcode=8598144, exception=0x8334bc, occasion=0x8334b4 "\331XO\365") at dl-error-skeleton.c:148
#1 0x006e9150 in ?? ()
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
It gets segfault when running
qml-runner
in ARM 32-bit environment.The replay output is as follows:
As above,
dlsym
and_Uarm_step
might be some problematic points. It might also be related to auto recover issue as well.