lfeng14 / llvm-note

0 stars 0 forks source link

原因分析 - mariadb软件包RISCV平台启动报错illegal instruction #3

Open lfeng14 opened 2 months ago

lfeng14 commented 2 months ago

现象

在RISCV机器上,使用llvm构建mariadb后,启动未成功。

systemctl restart mariadb

journalctl日志

# journalctl -xeu mariadb.service
ay 30 12:39:50 openeuler-riscv64 mariadb-prepare-db-dir[5250]: Initializing MariaDB database
May 30 12:40:15 openeuler-riscv64 mariadb-prepare-db-dir[5318]: /usr/bin/mariadb-install-db: line 543:  5320 Ille>

软件包日志

# vi /var/log/mariadb/mariadb.log
240530 16:32:43 [ERROR] mysqld got signal 4 ;
Sorry, we probably made a mistake, and this is a bug.

如何找到哪个指令导致的illegal instruction

Thread 1 "mysqld" received signal SIGILL, Illegal instruction. rw_lock_x_lock_wait_func (lock=0x2aacf707f0, threshold=0, file_name=0x2aab8fa978 "/home/lkp/rpmbuild/BUILD/mariadb-10.5.24/storage/innobase/fsp/fsp0fsp.cc", line=559) at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/storage/innobase/sync/sync0rw.cc:465 465 rw_lock_stats.rw_x_spin_round_count.add(n_spins); Missing separate debuginfos, use: dnf debuginfo-install Judy-1.0.5-19.mg2403.riscv64 libaio-0.3.113-9.mg2403.riscv64 libcap-2.69-3.mg2403.riscv64 libgcc-12.3.1-25.mg2403.riscv64 libgcrypt-1.10.2-1.mg2403.riscv64 libgpg-error-1.47-1.mg2403.riscv64 libstdc++-12.3.1-25.mg2403.riscv64 libxcrypt-4.4.36-2.mg2403.riscv64 lz4-1.9.4-2.mg2403.riscv64 openssl-libs-3.0.12-3.oe2403.riscv64 pcre2-10.42-6.mg2403.riscv64 sssd-client-2.9.4-2.mg2403.riscv64 systemd-libs-255-10.mg2403.riscv64 xz-libs-5.4.4-1.mg2403.riscv64 zlib-1.3.1-1.mg2403.riscv64 (gdb) bt

0 rw_lock_x_lock_wait_func (lock=0x2aacf707f0, threshold=0,

file_name=0x2aab8fa978 "/home/lkp/rpmbuild/BUILD/mariadb-10.5.24/storage/innobase/fsp/fsp0fsp.cc", line=559)
at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/storage/innobase/sync/sync0rw.cc:465

1 0x0000002aab6941fc in rw_lock_x_lock_low (lock=0x2aacf707f0, pass=,

file_name=0x2aab8fa978 "/home/lkp/rpmbuild/BUILD/mariadb-10.5.24/storage/innobase/fsp/fsp0fsp.cc", line=559)
at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/storage/innobase/sync/sync0rw.cc:506

2 0x0000002aab693ece in rw_lock_x_lock_func (lock=0x2aacf707f0, pass=0,

file_name=0x2aab8fa978 "/home/lkp/rpmbuild/BUILD/mariadb-10.5.24/storage/innobase/fsp/fsp0fsp.cc", line=559)
at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/storage/innobase/sync/sync0rw.cc:661

3 0x0000002aab733e16 in pfs_rw_lock_x_lock_func (lock=0x2aacf707f0, pass=0, line=559,

file_name=<optimized out>) at include/sync0rw.inl:553

4 mtr_t::x_lock_space (this=0x3fffffcbd8, space=0x2aacf70768, line=559, file=)

at include/mtr0mtr.h:265

5 fsp_header_init (space=0x2aacf70768, size=768, mtr=0x3fffffcbd8)

at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/storage/innobase/fsp/fsp0fsp.cc:559

6 0x0000002aab68d44a in srv_start (create_new_db=true)

at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/storage/innobase/srv/srv0start.cc:1417

7 0x0000002aab5ca81a in innodb_init (p=)

at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/storage/innobase/handler/ha_innodb.cc:4055

8 0x0000002aab35be50 in ha_initialize_handlerton (plugin=0x2aac994110)

at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/sql/handler.cc:651

9 0x0000002aab1bfd88 in plugin_do_initialize (plugin=0x2aac994110, state=@0x3fffffd39c: 4)

at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/sql/sql_plugin.cc:1450

10 0x0000002aab1bf9b6 in plugin_initialize (tmp_root=0x3fffffd950, plugin=0x2aac994110,

argc=0x2aac0492f0 <remaining_argc>, argv=0x2aac952ee0, options_only=false)
at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/sql/sql_plugin.cc:1503

11 0x0000002aab1bf6e2 in plugin_init (argc=0x2aac0492f0 , argv=0x2aac952ee0,

flags=<optimized out>) at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/sql/sql_plugin.cc:1760

12 0x0000002aab10ee10 in init_server_components ()

at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/sql/mysqld.cc:4948

13 0x0000002aab10bb0a in mysqld_main (argc=55, argv=0x2aac952ee0)

at /usr/src/debug/mariadb-10.5.24-1.oe2403.riscv64/sql/mysqld.cc:5539

14 0x0000003ff7bd1958 in ?? () from /usr/lib64/libc.so.6

15 0x0000003ff7bd1a00 in __libc_start_main () from /usr/lib64/libc.so.6

16 0x0000002aab108f14 in _start ()

-  通过堆栈信息找到crash所在函数是rw_lock_x_lock_wait_func,但gdb断点不了。通过mariadb代码发现该函数内联了,取消内联后重新构建

// UNIV_INLINE void rw_lock_x_lock_wait_func( /=====================/ rw_lock_t lock, /!< in: pointer to rw-lock */

ifdef UNIV_DEBUG

ulint       pass,   /*!< in: pass value; != 0, if the lock will
            be passed to another thread to unlock */

endif

lint        threshold,/*!< in: threshold to wait for */
const char* file_name,/*!< in: file name where lock requested */
unsigned    line)   /*!< in: line where requested */

{ HMT_low(); HMT_medium(); rw_lock_stats.rw_x_spin_round_count.add(n_spins); <----

}

- 通过gdb打断点、单步汇编后发现 **rdcycle**指令导致了illegal instruction
<img width="739" alt="image" src="https://github.com/lfeng14/llvm-note/assets/144297355/fd74ae74-7107-476c-a1cc-fe5c622f82d7">

#### 那什么是rdcycle指令
- rdcycle 是 RISC-V 指令集中用于读取处理器周期计数器值的指令。这里需要提到riscv的一个寄存器[mcounteren](https://five-embeddev.com/riscv-priv-isa-manual/Priv-v1.12/machine.html#sec:mcounteren),该寄存器低3位用于使能cycle类指令(第0位), time类指令(第1位),  instret类指令(第2位)。如果寄存器第0位值1则使能rdcycle指令。
![image](https://github.com/lfeng14/llvm-note/assets/144297355/2253f7ce-bb28-4ef5-9943-e6f78beb516e)

- 顺便补充下,假如riscv 用户态进程想执行rdcycle指令,会走什么流程呢 ? 比如mariadb进程执行rdcycle指令,先是系统调用,然后走ecall,最终到SBI执行,如果没有使能cycle类指令,那就报错illegal instruction。
![image](https://github.com/lfeng14/llvm-note/assets/144297355/27e04ffd-262a-4cb0-85c5-f0eb0bab7f71)

#### 哪里产生rdcycle指令 ?
- 通过查看sync0rw.cc的汇编(clang -s),发现rdcycle指令是来自于 include/my_rdtsc.h。

static inline ulonglong my_timer_cycles(void) {

if has_builtin(__builtin_readcyclecounter) && !defined (aarch64) === llvm支持builtin_readcyclecounter

return __builtin_readcyclecounter();

elif defined _WIN32 || defined i386 || defined __x86_64__

return __rdtsc();

elif defined(__INTEL_COMPILER) && defined(ia64) && defined(HAVE_IA64INTRIN_H)

return (ulonglong) __getReg(_IA64_REG_AR_ITC); / (3116) /

elif defined(GNUC) && defined(ia64)

{ ulonglong result; asm volatile__ ("mov %0=ar.itc" : "=r" (result)); return result; }

elif defined GNUC && defined powerpc

return __builtin_ppc_get_timebase();

elif defined(GNUC) && defined(__sparcv9) && defined(_LP64)

{ ulonglong result; asm volatile__ ("rd %%tick,%0" : "=r" (result)); return result; }

elif defined(GNUC) && defined(sparc) && !defined(_LP64)

{ union { ulonglong wholeresult; struct { ulong high; ulong low; } splitresult; } result; asm volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low)); return result.wholeresult; }

elif defined(GNUC) && defined(s390)

/ covers both s390 and s390x / { ulonglong result; asm volatile ("stck %0" : "=Q" (result) : : "cc"); return result; }

elif defined(GNUC) && defined (aarch64)

{ ulonglong result; asm volatile("mrs %0, CNTVCT_EL0" : "=&r" (result)); return result; }

elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)

/ gethrtime may appear as either cycle or nanosecond counter / return (ulonglong) gethrtime();

else

return 0; =============== gcc未使用rdcycle指令,直接返回0,这里是不对的。

endif

}

所以产生cycle指令是软件包自身代码行为,而gcc没有产生因为gcc不支持内建函数__builtin_readcyclecounter。

#### 为什么kernel没有使能cycle类指令 ?
-  riscv kernel 6.6出于安全上的考虑,增加对用户态访问csr指令如cycle类、instret类指令的管控。[riscv-kernel pr](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cc4c07c89aada16229084eeb93895c95b7eabaa3)

We used to unconditionnally expose the cycle and instret csrs to userspace, which gives rise to security concerns.

So now we only allow access to hw counters from userspace through the perf framework which will handle context switches, per-task events...etc. A sysctl allows to revert the behaviour to the legacy mode so that userspace applications which are not ready for this change do not break.

But the default value is to allow userspace only through perf: this will break userspace applications which rely on direct access to rdcycle. This choice was made for security reasons [1][2]: most of the applications which use rdcycle can instead use rdtime to count the elapsed time.

kernel关于csr指令寄存器的设置

解决方式

riscv架构上,将rdcycle指令切换为不受管控的rdtime指令,网上有相同案例请参考文末。

static inline ulonglong my_timer_cycles(void)
{
// __builtin_readsteadycounter llvm-project的main分支支持,17、18、19未同步
# if __has_builtin(__builtin_readsteadycounter) && defined (__riscv)   && defined (__llvm__)  // 增加该逻辑
  return __builtin_readsteadycounter();
# elif __has_builtin(__builtin_readcyclecounter) && !defined (__aarch64__)  
  return __builtin_readcyclecounter();
...

影响范围

软件包自身调用readcyclecounter指令,同时kernel 默认不使能该类指令,编译器又支持readcyclecounter后端指令时,这种情况会触发非法指令报错。

参考

lfeng14 commented 2 months ago

复现测试demo

#include <stdio.h>

typedef  long long ulonglong;

static inline ulonglong my_timer_cycles(void)
{
# if __has_builtin(__builtin_readcyclecounter) && !defined (__aarch64__)
  return __builtin_readcyclecounter();
# elif defined _WIN32 || defined __i386__ || defined __x86_64__
  return __rdtsc();
# elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
  return (ulonglong) __getReg(_IA64_REG_AR_ITC); /* (3116) */
#elif defined(__GNUC__) && defined(__ia64__)
  {
    ulonglong result;
    __asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
    return result;
  }
#elif defined __GNUC__ && defined __powerpc__
  return __builtin_ppc_get_timebase();
#elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64)
  {
    ulonglong result;
    __asm __volatile__ ("rd %%tick,%0" : "=r" (result));
    return result;
  }
#elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64)
  {
      union {
              ulonglong wholeresult;
              struct {
                      ulong high;
                      ulong low;
              }       splitresult;
      } result;
    __asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
    return result.wholeresult;
  }
#elif defined(__GNUC__) && defined(__s390__)
  /* covers both s390 and s390x */
  {
    ulonglong result;
    __asm__ __volatile__ ("stck %0" : "=Q" (result) : : "cc");
    return result;
  }
#elif defined(__GNUC__) && defined (__aarch64__)
  {
    ulonglong result;
    __asm __volatile("mrs       %0, CNTVCT_EL0" : "=&r" (result));
    return result;
  }
#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
  /* gethrtime may appear as either cycle or nanosecond counter */
  printf("a\n");
  return (ulonglong) gethrtime();
#else
 printf("b\n");
  return 0;
#endif
}

int main() {
    unsigned long cycles;

    // 内嵌汇编指令,读取周期计数器
    // asm volatile ("rdcycle %0" : "=r" (cycles));
    // __builtin_readcyclecounter();
    cycles = my_timer_cycles();
    printf("Cycle count: %lu\n", cycles);

    return 0;
}