Closed estibi closed 12 years ago
Please provide the kernel threads. Also, you are going to want to use the latest illumos bits, there have been various bugs that QEMU/kvm has caused that have been fixed.
illumos-gate debug build r13629
100563: qemu-kvm -enable-kvm -nographic -smp 2 -m 8192 -vnc 0.0.0.0:52 -no-hpe ----------------- lwp# 1 / thread# 1 -------------------- fffffd7fff2b3e2a pollsys (fffffd7fffdf9660, 4, fffffd7fffdf9760, 0) fffffd7fff23c504 pselect () + 18c fffffd7fff23ca18 select () + 70 00000000004bcf1f main_loop_wait () + 19f 00000000004cf70b kvm_main_loop () + bb 00000000004be406 main () + 9a6 00000000004b031c _start () + 6c ----------------- lwp# 2 / thread# 2 -------------------- fffffd7fff2b349a sigtimedwait (f75cc0, fffffd7ffee7ecb0, 0) fffffd7fff29a144 sigwaitinfo () + c 00000000004db088 sigwait_compat () + 48 fffffd7fff2aaf24 _thrp_setup () + bc fffffd7fff2ab1f0 _lwp_start () ----------------- lwp# 3 / thread# 3 -------------------- fffffd7fff2b3baa ioctl (b, 2000ae80, 0) 00000000004cecd9 kvm_cpu_exec () + 9 00000000004cf33a ap_main_loop () + 22a fffffd7fff2aaf24 _thrp_setup () + bc fffffd7fff2ab1f0 _lwp_start () ----------------- lwp# 4 / thread# 4 -------------------- fffffd7fff2b3baa ioctl () + a 00000000004cecd9 kvm_cpu_exec () + 9 00000000004cf33a ap_main_loop () + 22a fffffd7fff2aaf24 _thrp_setup () + bc fffffd7fff2ab1f0 _lwp_start () ----------------- lwp# 5 / thread# 5 -------------------- fffffd7fff2b349a sigtimedwait (2011b4ca0, fffffd7ffe540cb0, 0) fffffd7fff29a144 sigwaitinfo () + c 00000000004db088 sigwait_compat () + 48 fffffd7fff2aaf24 _thrp_setup () + bc fffffd7fff2ab1f0 _lwp_start ()
100563 root 0.0 100 0.0 0.0 0.0 0.0 0.0 0.0 2 17 19 0 qemu-kvm-sys/4 100567 root 0.0 4.0 0.0 0.0 0.0 0.0 95 0.6 9K 12 1K 0 qemu-kvm-sys/3 100564 root 0.0 3.6 0.0 0.0 0.0 0.0 96 0.7 9K 1 829 0 qemu-kvm-sys/3 100567 root 0.0 2.1 0.0 0.0 0.0 0.0 97 0.6 5K 17 17 0 qemu-kvm-sys/5 100563 root 0.0 1.8 0.0 0.0 0.0 0.0 98 0.6 9K 0 0 0 qemu-kvm-sys/3 100567 root 0.0 1.4 0.0 0.0 0.0 0.0 98 0.2 5K 11 11 0 qemu-kvm-sys/6 100567 root 0.0 1.3 0.0 0.0 0.0 0.0 98 0.2 5K 13 73 6 qemu-kvm-sys/4 100564 root 0.0 1.3 0.0 0.0 0.0 0.0 99 0.2 5K 0 0 0 qemu-kvm-sys/4 100567 root 0.1 0.1 0.0 0.0 0.0 0.0 100 0.0 181 0 1K 0 qemu-kvm-sys/1 100563 root 0.1 0.1 0.0 0.0 0.0 0.0 100 0.0 177 4 1K 0 qemu-kvm-sys/1 100816 root 0.1 0.0 0.0 0.0 0.0 0.0 100 0.0 1 1 60 0 intrd/1 102018 root 0.0 0.1 0.0 0.0 0.0 0.0 100 0.0 31 0 428 0 prstat/1
::stacks -v mdb: stacks: processing kernel threads mdb: stacks: 111 unique stacks / 1492 threads mdb: stacks: done THREAD STATE SOBJ COUNT ffffff0170011c40 SLEEP CV 550 swtch+0x1e6 cv_wait+0x7f taskq_thread_wait+0x94 taskq_thread+0x324 thread_start+8
ffffff345df4cc40 SLEEP CV 165 swtch+0x1e6 cv_wait_sig_swap_core+0x183 cv_wait_sig_swap+0x18 cv_waituntil_sig+0x13c lwp_park+0x157 syslwp_park+0x31 _sys_sysenter_post_swapgs+0x237
ffffff017009bc40 FREE
::ps ! grep 100563 R 100563 1 100561 100561 0 0x42000000 ffffff34964250d8 qemu-kvm-system- ffffff34964250d8::walk thread | ::findstack -v stack pointer for thread ffffff34640ff860: ffffff01723b6c50 [ ffffff01723b6c50 _resume_from_idle+0xf1() ] ffffff01723b6c80 swtch+0x1e6() ffffff01723b6d10 cv_timedwait_sig_hires+0x206(ffffff3490f75b42, ffffff3490f75b08, 3b9aca00, f4240, 0) ffffff01723b6d70 cv_reltimedwait_sig+0x77(ffffff3490f75b42, ffffff3490f75b08, 64, 2) ffffff01723b6da0 cv_relwaituntil_sig+0x25(ffffff3490f75b42, ffffff3490f75b08, ffffff01723b6e08, 2) ffffff01723b6e60 poll_common+0x4c1(fffffd7fffdf9660, 4, ffffff01723b6e80, 0) ffffff01723b6ef0 pollsys+0xea(fffffd7fffdf9660, 4, fffffd7fffdf9760, 0) ffffff01723b6f00 sys_syscall+0x26e() stack pointer for thread ffffff3464343c00: ffffff017232ecd0 [ ffffff017232ecd0 _resume_from_idle+0xf1() ] ffffff017232ed00 swtch+0x1e6() ffffff017232ed60 cv_wait_sig_swap_core+0x183(ffffff3464343dee, ffffff3430528c40, 0) ffffff017232ed80 cv_wait_sig_swap+0x18(ffffff3464343dee, ffffff3430528c40) ffffff017232edf0 cv_waituntil_sig+0x13c(ffffff3464343dee, ffffff3430528c40, 0, 0) ffffff017232eef0 sigtimedwait+0x204(f75cc0, fffffd7ffee7ecb0, 0) ffffff017232ef00 sys_syscall+0x26e() stack pointer for thread ffffff346fab30c0: ffffff01723bc3a0 [ ffffff01723bc3a0 _resume_from_idle+0xf1() ] ffffff01723bc3d0 swtch+0x1e6() ffffff01723bc430 cv_wait_sig_swap_core+0x183(ffffff3496f57050, ffffff3496f57048, 0) ffffff01723bc450 cv_wait_sig_swap+0x18(ffffff3496f57050, ffffff3496f57048) ffffff01723bc480 kvm_vcpu_block+0x6b() ffffff01723bc4a0 __vcpu_run+0x84() ffffff01723bc4e0 kvm_arch_vcpu_ioctl_run+0x110() ffffff01723bccf0 kvm_ioctl+0x119() ffffff01723bcd30 cdev_ioctl+0x45(10e00000004, 2000ae80, 0, 202003, ffffff34936d7d28, ffffff01723bce14) ffffff01723bcd70 spec_ioctl+0x5a(ffffff34936d3000, 2000ae80, 0, 202003, ffffff34936d7d28, ffffff01723bce14, 0) ffffff01723bcdf0 fop_ioctl+0x7b(ffffff34936d3000, 2000ae80, 0, 202003, ffffff34936d7d28, ffffff01723bce14, 0) ffffff01723bcef0 ioctl+0x18e(b, 2000ae80, 0) ffffff01723bcf00 sys_syscall+0x26e() stack pointer for thread ffffff34600d24a0: ffffff01722dc370 [ ffffff01722dc370 _resume_from_idle+0xf1() ] ffffff01722dc480 vcpu_enter_guest+0x35e() ffffff01722dc4a0 __vcpu_run+0x77() ffffff01722dc4e0 kvm_arch_vcpu_ioctl_run+0x110() ffffff01722dccf0 kvm_ioctl+0x119() ffffff01722dcd30 cdev_ioctl+0x45(10e00000005, 2000ae80, 0, 202003, ffffff34936d7d28, ffffff01722dce14) ffffff01722dcd70 spec_ioctl+0x5a(ffffff3496a90600, 2000ae80, 0, 202003, ffffff34936d7d28, ffffff01722dce14, 0) ffffff01722dcdf0 fop_ioctl+0x7b(ffffff3496a90600, 2000ae80, 0, 202003, ffffff34936d7d28, ffffff01722dce14, 0) ffffff01722dcef0 ioctl+0x18e(c, 2000ae80, 0) ffffff01722dcf00 sys_syscall+0x26e() stack pointer for thread ffffff345fdb6c00: ffffff017242ecd0 [ ffffff017242ecd0 _resume_from_idle+0xf1() ] ffffff017242ed00 swtch+0x1e6() ffffff017242ed60 cv_wait_sig_swap_core+0x183(ffffff345fdb6dee, ffffff3430528c40, 0) ffffff017242ed80 cv_wait_sig_swap+0x18(ffffff345fdb6dee, ffffff3430528c40) ffffff017242edf0 cv_waituntil_sig+0x13c(ffffff345fdb6dee, ffffff3430528c40, 0, 0) ffffff017242eef0 sigtimedwait+0x1eb(2011b4ca0, fffffd7ffe540cb0, 0) ffffff017242ef00 sys_syscall+0x26e()
From the kernel stacks, it looks like you have threads which are making forward progress and are running the VM. Also, why do you have the -no-hpe option, are you trying to toggle it to be -no-hpet? If so, that shouldn't be necessary with the newest bits. As an added sanity check, what does kvmstat say is going on and does vnc show that the guest hasn't paniced?
Updates:
illumos-gate r13632 non debug
illumos-kvm 76be9d4 illumos-kvm-cmd 4f48eea
I have multiple kvm instances here.
PID USERNAME USR SYS TRP TFL DFL LCK SLP LAT VCX ICX SCL SIG PROCESS/LWPID 13540 root 0.2 100 0.0 0.0 0.0 0.0 0.0 0.0 0 0 5 0 qemu-kvm-sys/3 17412 root 26 63 0.0 0.0 0.0 0.0 11 0.0 48 0 806 0 prstat/1 394 root 3.6 6.4 0.0 0.0 0.0 0.0 90 0.0 48 0 288 0 nscd/6 16972 root 0.0 5.2 0.0 0.0 0.0 0.0 94 0.5 10 0 0 0 qemu-kvm-sys/3 16997 root 0.0 4.0 0.0 0.0 0.0 0.0 95 0.6 5 0 0 0 qemu-kvm-sys/3 17000 root 0.0 3.6 0.0 0.0 0.0 0.0 96 0.5 10 0 0 0 qemu-kvm-sys/3 12424 root 0.0 3.6 0.0 0.0 0.0 0.0 96 0.4 10 0 0 0 qemu-kvm-sys/3
As you can see 13540 consumes 100% SYS CPU time, but this VM actually works fine.. and it's idle.
/usr/bin/amd64/qemu-kvm-system-x86_64 -enable-kvm -nographic -smp 1 -m 8192 -vnc 0.0.0.0:23 -no-hpet -no-acpi -net 'nic,vlan=0,name=net0,macaddr=2:8:20:7b:c8:6b,model=virtio' -net 'vnic,vlan=0,name=XXXvm03,ifname=XXXvm03,macaddr=2:8:20:7b:c8:6b' -drive 'file=/dev/zvol/rdsk/data/XXXvm03,index=0,media=disk,if=virtio' -cpu qemu64 -daemonize
Note that I replaced here the disk and NIC names.
This one hung at boot:
/usr/bin/amd64/qemu-kvm-system-x86_64 -enable-kvm -nographic -smp 1 -m 10240 -vnc 0.0.0.0:51 -no-hpet -no-acpi -net 'nic,vlan=0,name=net0,macaddr=2:8:20:ec:65:7b,model=virtio' -net 'vnic,vlan=0,name=XXXvm01,ifname=XXXvm01,macaddr=2:8:20:ec:65:7b' -drive 'file=/dev/zvol/rdsk/data/XXXvm01,index=0,media=disk,if=virtio' -daemonize
pid vcpu | exits : haltx irqx irqwx iox mmiox | irqs emul eptv 12025 0 | 7035 : 2000 35 0 3000 0 | 2000 2000 0 12424 0 | 7015 : 1999 15 1 3000 0 | 2000 2000 0 12428 0 | 7037 : 2000 35 0 3001 0 | 2001 2001 0 13540 0 | 7017 : 1855 12 146 3003 0 | 2001 2001 0 14057 0 | 7050 : 1839 51 160 3000 0 | 2000 2000 0 14063 0 | 7025 : 2000 25 0 3000 0 | 2000 2000 0 16972 0 | 7009 : 1701 4 300 3003 0 | 2001 2001 0 16985 0 | 7071 : 2000 48 1 3005 0 | 2002 2002 8 16991 0 | 2008 : 1000 8 0 0 0 | 1000 1000 0 16994 0 | 7033 : 2003 17 0 3009 0 | 2004 2004 0 16997 0 | 7014 : 2000 14 0 3000 0 | 2000 2000 0 17000 0 | 7050 : 2003 22 0 3009 0 | 2004 2004 6 pid vcpu | exits : haltx irqx irqwx iox mmiox | irqs emul eptv 12025 0 | 7031 : 2001 24 0 3003 0 | 2001 2001 0 12424 0 | 7015 : 2000 15 0 3000 0 | 2000 2000 0 12428 0 | 7020 : 1694 17 307 3002 0 | 2000 2000 0 13540 0 | 7069 : 1879 69 121 3000 0 | 2000 2000 0 14057 0 | 7001 : 1999 1 1 3000 0 | 2000 2000 0 14063 0 | 7012 : 2000 12 0 3000 0 | 2000 2000 0 16972 0 | 7032 : 1999 16 1 3000 0 | 2000 2000 9 16985 0 | 7029 : 2002 16 0 3008 0 | 2003 2003 0 16991 0 | 2003 : 1000 3 0 0 0 | 1000 1000 0 16994 0 | 7015 : 2001 3 1 3007 0 | 2003 2003 0 16997 0 | 7014 : 2000 14 0 3000 0 | 2000 2000 0 17000 0 | 7032 : 2002 20 0 3007 0 | 2003 2003 0 pid vcpu | exits : haltx irqx irqwx iox mmiox | irqs emul eptv 12025 0 | 7114 : 2000 114 0 3000 0 | 2000 2000 0 12424 0 | 7013 : 1999 11 0 3000 0 | 2000 2000 1 12428 0 | 7005 : 2000 5 0 3000 0 | 2000 2000 0 13540 0 | 7010 : 2000 8 0 3000 0 | 2000 2000 0 14057 0 | 6980 : 1963 16 19 3000 0 | 1982 1982 0 14063 0 | 7061 : 1841 61 155 3003 0 | 2001 2001 0 16972 0 | 7005 : 2000 5 0 3000 0 | 2000 2000 0 16985 0 | 7124 : 1597 117 403 3005 0 | 2002 2002 0 16991 0 | 2005 : 1000 5 0 0 0 | 1000 1000 0 16994 0 | 7020 : 2002 4 1 3009 0 | 2004 2004 0 16997 0 | 7008 : 2000 8 0 3000 0 | 2000 2000 0 17000 0 | 7064 : 2003 48 0 3009 0 | 2004 2004 0 pid vcpu | exits : haltx irqx irqwx iox mmiox | irqs emul eptv 12025 0 | 7020 : 2000 20 0 3000 0 | 2000 2000 0 12424 0 | 7026 : 2001 17 0 3006 0 | 2002 2002 0 12428 0 | 7007 : 2000 7 0 3000 0 | 2000 2000 0 13540 0 | 7023 : 2000 23 0 3000 0 | 2000 2000 0 14057 0 | 7031 : 2000 31 0 3000 0 | 2000 2000 0 14063 0 | 7133 : 1698 136 299 3000 0 | 2000 2000 0 16972 0 | 7006 : 2000 6 0 3000 0 | 2000 2000 0 16985 0 | 7027 : 2001 19 0 3005 0 | 2002 2002 0 16991 0 | 2004 : 1000 4 0 0 0 | 1000 1000 0 16994 0 | 7015 : 2001 3 1 3007 0 | 2003 2003 0 16997 0 | 7008 : 2000 8 0 3000 0 | 2000 2000 0 17000 0 | 7032 : 2002 20 0 3007 0 | 2003 2003 0 ^C
CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 25 0 20 468 157 2467 1 37 75 0 415 0 4 0 96 1 14 0 16 2275 1111 1787 2 38 70 0 542 0 5 0 95 2 9 0 15 4040 1997 2252 2 27 64 0 299 0 5 0 95 3 9 0 16 2188 1068 2510 2 35 72 0 539 0 4 0 96 4 12 0 37 3980 1961 4479 7 43 67 0 414 0 10 0 90 5 9 0 33 2300 1123 2801 5 37 67 0 542 0 18 0 81 6 9 0 38 4228 2213 4470 5 36 111 0 468 0 9 0 91 7 10 0 35 7795 3864 5816 7 48 91 0 481 0 11 0 89 8 8 0 16 3996 1978 2589 3 29 65 0 331 0 5 0 95 9 6 0 17 3901 1923 2225 2 25 60 0 272 0 4 0 96 10 7 0 15 1954 958 2585 3 29 67 0 351 0 4 0 96 11 7 0 11 535 191 1308 1 20 58 0 275 0 2 0 98 12 8 0 34 2180 1066 3537 4 36 76 0 596 0 12 0 87 13 8 0 28 4146 2045 2861 4 30 63 0 435 0 20 0 80 14 9 0 38 4174 2057 4033 8 47 78 0 574 0 10 0 90 15 8 0 27 2033 989 4112 6 35 62 0 338 0 10 0 90 CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 0 0 13 6131 3005 2162 0 50 4 0 720 0 4 0 96 1 0 0 12 4094 2026 1662 1 62 9 0 828 0 2 0 98 2 0 0 13 65 15 2953 2 35 6 0 102 0 8 0 92 3 0 0 13 2196 1068 1928 7 61 9 0 369 0 3 0 97 4 0 0 67 4103 1986 3340 22 106 0 0 730 1 8 0 91 5 0 0 0 2132 1039 0 0 0 0 0 0 0 100 0 0 6 0 0 58 306 185 4246 20 96 7 0 393 0 19 0 81 7 0 0 30 6102 3003 2932 12 55 1 0 437 0 47 0 53 8 0 0 25 240 88 1452 4 51 22 0 284 1 2 0 97 9 1 0 17 174 52 1550 1 40 3 0 589 1 2 0 97 10 0 0 12 2035 1005 3347 0 40 8 0 288 0 5 0 95 11 0 0 12 2036 1003 3384 3 30 1 0 196 1 4 0 95 12 0 0 74 4227 2038 4032 18 98 1 0 567 0 45 0 55 13 0 0 97 6153 3003 4104 20 137 29 0 372 0 16 0 84 14 0 0 48 4123 1994 2564 18 77 0 0 557 0 52 0 48 15 0 0 44 4078 1989 2331 8 64 31 0 362 0 47 0 53 CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 0 0 22 6138 3007 2969 0 49 16 0 109 0 5 0 95 1 0 0 23 4040 1989 2497 4 81 18 0 1053 1 4 0 95 2 0 0 13 77 19 1947 3 44 9 0 183 0 7 0 93 3 0 0 25 2201 1078 1594 2 50 21 0 538 0 3 0 97 4 0 0 102 4298 2025 5394 29 140 3 0 517 0 6 0 94 5 0 0 8 2157 1055 878 3 17 0 0 171 0 77 0 23 6 0 0 30 268 166 4332 12 59 24 0 321 0 6 0 94 7 0 0 37 6010 2972 4083 8 64 1 0 242 1 8 0 91 8 0 0 12 263 97 1456 3 56 18 0 457 0 3 0 97 9 4 0 9 377 73 925 0 37 6 0 692 0 1 0 99 10 0 0 21 2029 994 3615 2 51 15 0 600 1 4 0 95 11 0 0 17 2047 1002 3687 3 32 13 0 156 0 4 0 96 12 0 0 62 4271 2073 4474 23 116 3 0 729 0 5 0 95 13 0 0 79 6192 3004 4393 14 116 5 0 621 0 28 0 72 14 0 0 20 4145 2019 1092 18 30 7 0 536 0 80 0 20 15 0 0 0 4106 2008 0 5 0 1 0 461 1 99 0 0 CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 0 0 19 6129 2993 2112 3 64 179 0 593 1 4 0 95 1 0 0 29 4076 2007 3363 5 93 146 0 501 0 5 0 95 2 0 0 15 77 20 2947 4 55 127 0 125 0 9 0 91 3 0 0 10 2170 1054 1083 7 74 182 0 575 0 2 0 98 4 0 0 53 4097 1975 4537 23 108 108 0 545 0 6 0 94 5 0 0 41 2230 1062 3627 14 101 98 0 832 0 4 0 96 6 0 0 23 392 333 4240 15 53 174 0 188 0 7 0 93 7 0 0 34 6063 3002 3447 11 60 60 0 98 0 7 0 93 8 0 0 12 248 82 1294 5 68 136 0 698 0 1 0 99 9 0 0 8 282 68 1055 1 48 66 0 574 0 2 0 98 10 0 0 18 2064 1013 3544 1 49 93 0 145 0 4 0 96 11 0 0 19 2039 1002 3551 4 48 98 0 90 0 5 0 95 12 0 0 39 4182 2047 3941 16 91 102 0 679 0 5 0 95 13 0 0 45 6103 3001 4857 14 99 103 0 568 0 5 0 95 14 0 0 0 4115 2014 0 6 0 1 0 461 0 100 0 0 15 0 0 0 4102 2008 0 6 0 0 0 441 0 100 0 0 CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 0 0 18 6039 2950 2299 5 51 43 0 255 0 4 0 96 1 0 0 25 4104 2025 2997 3 60 3 0 569 0 3 0 97 2 0 0 9 87 24 1742 2 33 2 0 274 0 6 0 94 3 0 0 13 2237 1081 1213 2 57 1 0 514 1 1 0 98 4 0 0 79 4184 2024 4798 16 118 0 0 484 0 5 0 95 5 0 0 30 2179 1060 4087 12 69 6 0 752 0 4 0 96 6 0 0 17 86 37 2546 3 39 15 0 175 0 5 0 95 7 0 0 17 6030 2999 4038 6 31 13 0 79 0 7 0 93 8 0 0 14 249 79 2029 4 52 7 0 586 0 3 0 97 9 0 0 2 234 66 460 1 30 1 0 904 0 0 0 100 10 0 0 13 2057 1004 3006 5 47 25 0 453 0 4 0 96 11 0 0 10 2058 1012 3655 2 24 4 0 117 0 5 0 95 12 0 0 33 4099 2018 2955 12 62 2 0 509 1 4 0 95 13 0 0 69 6112 2989 5552 13 100 2 0 640 1 6 0 93 14 0 0 0 4115 2013 0 5 0 0 0 464 0 100 0 0 15 0 0 0 4099 2005 2 5 0 0 0 465 0 100 0 0
Loading modules: [ unix genunix specfs dtrace mac cpu.generic uppc pcplusmp scsi_vhci zfs mpt mpt_sas sd ip hook neti sockfs arp usba uhci stmf stmf_sbd fctl md lofs random idm crypto cpc fcp fcip ufs logindmux ptm sppp nsmb smbsrv nfs kvm ]
::stacks -v mdb: stacks: processing kernel threads mdb: stacks: 127 unique stacks / 2190 threads mdb: stacks: done THREAD STATE SOBJ COUNT ffffff0170011c40 SLEEP CV 578 swtch+0x145 cv_wait+0x61 taskq_thread_wait+0x84 taskq_thread+0x308 thread_start+8
ffffff3289bb1420 SLEEP CV 200 swtch+0x145 cv_wait_sig_swap_core+0x174 cv_wait_sig_swap+0x18 cv_waituntil_sig+0x13c lwp_park+0x157 syslwp_park+0x31 _sys_sysenter_post_swapgs+0x149
ffffff0171db7c40 SLEEP CV 174 swtch+0x145 cv_wait+0x61 mac_soft_ring_worker+0xb0 thread_start+8
ffffff3291ad9060 SLEEP CV 127 swtch+0x145 cv_timedwait_sig_hires+0x1e9 cv_waituntil_sig+0xba nanosleep+0x120 _sys_sysenter_post_swapgs+0x149
ffffff0171dabc40 SLEEP CV 116 swtch+0x145 cv_wait+0x61 mac_srs_worker+0x1b4 thread_start+8
ffffff017009bc40 FREE
Okay, I should have noticed this earlier, but one of the things that we have fixed a little while back and is hopefully what is causing you problems is related to the presence of -no-hpet and -no-acpi. There have been known boot and timer issues with those, which is what inspired us to fix those in QEMU as well as get multiticks there. For the systems that fail to boot, it'd still be extremely helpful if you could get a VNC screenshot or something like that, so we know where they are failing to boot.
Unrelated, I do notice cstates seem to be on the scene, which have been known to cause general illumos problems, you may be happier with them disabled in the bios.
Screenshot attached.
http://www.dropbox.com/gallery/7506135/1/illumos-kvm-cmd%20%238?h=9ee6be
On Tue, Mar 13, 2012 at 4:33 PM, Robert Mustacchi reply@reply.github.com wrote:
Okay, I should have noticed this earlier, but one of the things that we have fixed a little while back and is hopefully what is causing you problems is related to the presence of -no-hpet and -no-acpi. There have been known boot and timer issues with those, which is what inspired us to fix those in QEMU as well as get multiticks there. For the systems that fail to boot, it'd still be extremely helpful if you could get a VNC screenshot or something like that, so we know where they are failing to boot.
Unrelated, I do notice cstates seem to be on the scene, which have been known to cause general illumos problems, you may be happier with them disabled in the bios.
Reply to this email directly or view it on GitHub: https://github.com/joyent/illumos-kvm-cmd/issues/8#issuecomment-4475736
Piotr Jasiukajtis
I'm able to reproduce guest's IO-APIC panic without -no-hpet flag using simple shell script (was paranoid about SMF in the first place).
I can see some symbols added by 'HVM-711 need an interval timer-based alarm timer backend' in the live image of running qemu-kvm process, so I think we have at least right bits in the right place.
Loading modules: [ ld.so.1 libc.so.1 ]
::nm -D ! grep multitick 0x0000000000541b20|0x00000000000002b4|FUNC |LOCL |0x0 |17 |multiticks_start_timer 0x0000000000541ad0|0x000000000000004e|FUNC |LOCL |0x0 |17 |multiticks_stop_timer 0x0000000000541de0|0x0000000000000492|FUNC |LOCL |0x0 |17 |multiticks_rearm_timer 0x0000000000754918|0x0000000000000004|OBJT |GLOB |0x0 |30 |multiticks_tolerance_jitter 0x000000000075491c|0x0000000000000004|OBJT |GLOB |0x0 |30 |multiticks_enabled 0x0000000000754910|0x0000000000000008|OBJT |GLOB |0x0 |30 |multiticks_tolerance_interval 0x0000000000754900|0x0000000000000004|OBJT |GLOB |0x0 |30 |multiticks_reap_multiplier 0x0000000000754908|0x0000000000000008|OBJT |GLOB |0x0 |30 |multiticks_reap_threshold
Do I need to enable something on a client side (linux guest)? I do have IO-APIC panics if HPET KVM is used. I traced all system calls from qemu-kvm:
4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C788) = 0 4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C7B0) = 0 4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C7D8) = 0 4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C800) = 0 4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C828) = 0 4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C850) = 0 4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C878) = 0 4461: timer_create(4, 0xFFFFFD7FFFDFF6E0, 0x00F8C8A0) = 0
clockid_t argument in timer_create is 4, which means it's using CLOCK_MONOTONIC (CLOCK_HIGHRES)
http://src.illumos.org/source/xref/illumos-gate/usr/src/uts/common/sys/time_impl.h#126
So at least we know CLOCK_HIGHRES is used here.
Ok, I figured out the issue was an old guest kernel version. I'm unable to reproduce it on the latest guest kernel version (I don't use -no-hpet option now).
Host machine: oi_151a, oi_151a2 Both kvm and kvm-cmd are built by hand from github - the latest version.
When multiple guests are booted at the same time, some of them hangs.
VMs is are Linux hosts.
mpstat 1 3
CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 97 0 24 3821 1847 1499 1 38 464 0 813 0 11 0 89 1 56 0 61 2192 1054 1502 3 33 441 0 1224 0 20 0 79 2 43 0 24 181 19 1434 2 22 600 0 469 0 7 0 93 3 40 0 13 227 28 1215 2 24 674 0 913 0 22 0 78 4 97 0 47 199 17 1097 1 23 226 0 784 0 3 0 97 5 41 0 40 204 22 999 1 20 138 0 1180 0 4 0 96 6 50 0 41 1819 912 1535 2 17 222 0 764 0 4 0 96 7 94 0 96 3759 1842 1828 3 22 202 0 904 0 6 0 94 8 26 0 22 469 156 1646 1 24 450 0 442 0 9 0 91 9 24 0 11 190 19 741 1 17 222 0 497 0 9 0 90 10 30 0 33 1789 865 1667 2 19 993 0 397 0 10 0 90 11 19 0 15 163 17 977 1 13 291 0 270 0 8 0 92 12 56 0 59 1069 482 1678 2 21 186 0 1117 0 7 0 93 13 34 0 68 2327 1127 1770 2 17 468 0 310 0 6 0 94 14 39 0 60 2461 1186 1685 2 19 209 0 651 0 4 0 96 15 73 0 41 190 21 958 1 16 127 0 333 0 2 0 98 CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 0 0 0 2399 1147 135 0 18 9 0 347 0 0 0 100 1 0 0 0 4 0 0 3 0 0 0 3 0 100 0 0 2 0 0 0 113 10 167 0 6 1 0 216 1 0 0 99 3 0 0 0 64 14 2051 4 4 301 0 121 0 1 0 99 4 21 0 1 128 9 211 2 9 2 0 336 0 0 0 100 5 0 0 4 74 14 1636 2 16 210 0 97 0 33 0 67 6 0 0 5 3979 1982 3300 2 15 174 0 44 0 3 0 97 7 0 0 4 2177 1075 2321 3 20 98 0 730 0 2 0 98 8 0 0 0 2006 1000 0 4 0 0 0 4 0 100 0 0 9 0 0 0 44 14 22 0 1 0 0 95 0 0 0 100 10 0 0 0 100 7 2273 0 21 286 0 315 0 1 0 99 11 0 0 0 223 72 146 0 3 19 0 665 0 0 0 100 12 0 0 5 2078 1025 3248 6 25 242 0 342 1 2 0 97 13 0 0 2 4076 2031 1740 1 15 31 0 51 0 14 0 86 14 0 0 2 83 19 72 6 12 1 0 73 0 55 0 45 15 0 0 0 64 14 50 0 11 3 0 18 0 0 0 100 CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 0 0 0 2413 1157 183 0 15 20 0 662 1 1 0 98 1 0 0 0 5 1 0 3 0 0 0 3 0 100 0 0 2 0 0 0 100 2 158 0 2 0 0 142 0 0 0 100 3 0 0 0 33 2 1982 3 0 257 0 5 0 2 0 98 4 0 0 0 80 20 50 0 7 0 0 153 0 1 0 99 5 0 0 1 90 31 4798 2 13 637 0 620 0 4 0 96 6 0 0 1 4009 2001 208 0 10 10 0 10 0 2 0 98 7 8 0 1 2098 1034 2505 8 19 158 0 652 0 2 0 98 8 0 0 0 2004 1000 0 3 0 0 0 3 0 100 0 0 9 0 0 0 14 6 8 0 1 0 0 0 0 0 0 100 10 0 0 0 115 4 2310 0 8 353 0 298 0 2 0 98 11 0 0 0 217 72 144 0 2 11 0 665 0 0 0 100 12 0 0 2 2047 1010 4036 2 14 52 0 125 0 2 0 98 13 0 0 3 4079 2031 871 3 15 23 0 76 0 1 0 99 14 0 0 0 18 10 0 4 0 0 0 4 0 100 0 0 15 0 0 0 103 40 99 0 14 1 0 31 1 0 0 99
PID USERNAME USR SYS TRP TFL DFL LCK SLP LAT VCX ICX SCL SIG PROCESS/LWPID 2317 root 0.0 100 0.0 0.0 0.0 0.0 0.0 0.0 0 18 18 0 qemu-kvm-sys/6 2317 root 0.0 100 0.0 0.0 0.0 0.0 0.0 0.0 0 16 16 0 qemu-kvm-sys/5 2317 root 0.0 100 0.0 0.0 0.0 0.0 0.0 0.0 0 16 16 0 qemu-kvm-sys/4 2320 root 0.0 3.0 0.0 0.0 0.0 0.0 97 0.4 9K 15 917 6 qemu-kvm-sys/3 2323 root 0.0 2.5 0.0 0.0 0.0 0.0 97 0.4 9K 2 1K 0 qemu-kvm-sys/3 2317 root 0.0 1.5 0.0 0.0 0.0 0.0 98 0.4 9K 17 17 0 qemu-kvm-sys/3
pstack 2317
2317: qemu-kvm -enable-kvm -nographic -smp 4 -m 12288 -vnc 0.0.0.0:51 -no-hp ----------------- lwp# 1 / thread# 1 -------------------- fffffd7fff2cfeda pollsys (fffffd7fffdf9650, 4, fffffd7fffdf9760, 0) fffffd7fff25c154 pselect () + 18c fffffd7fff25c668 select () + 70 00000000004bcf1f main_loop_wait () + 19f 00000000004cf70b kvm_main_loop () + bb 00000000004be406 main () + 9a6 00000000004b031c _start () + 6c ----------------- lwp# 2 / thread# 2 -------------------- fffffd7fff2cf54a sigtimedwait (f75cc0, fffffd7ffee9ecb0, 0) fffffd7fff2b8704 sigwaitinfo () + c 00000000004db088 sigwait_compat () + 48 fffffd7fff2c6fd4 _thrp_setup () + bc fffffd7fff2c72a0 _lwp_start () ----------------- lwp# 3 / thread# 3 -------------------- fffffd7fff2cfc5a ioctl (b, 2000ae80, 0) 00000000004cecd9 kvm_cpu_exec () + 9 00000000004cf33a ap_main_loop () + 22a fffffd7fff2c6fd4 _thrp_setup () + bc fffffd7fff2c72a0 _lwp_start () ----------------- lwp# 4 / thread# 4 -------------------- fffffd7fff2cfc5a ioctl () + a 00000000004cecd9 kvm_cpu_exec () + 9 00000000004cf33a ap_main_loop () + 22a fffffd7fff2c6fd4 _thrp_setup () + bc fffffd7fff2c72a0 _lwp_start () ----------------- lwp# 5 / thread# 5 -------------------- fffffd7fff2cfc5a ioctl () + a 00000000004cecd9 kvm_cpu_exec () + 9 00000000004cf33a ap_main_loop () + 22a fffffd7fff2c6fd4 _thrp_setup () + bc fffffd7fff2c72a0 _lwp_start () ----------------- lwp# 6 / thread# 6 -------------------- fffffd7fff2cfc5a ioctl () + a 00000000004cecd9 kvm_cpu_exec () + 9 00000000004cf33a ap_main_loop () + 22a fffffd7fff2c6fd4 _thrp_setup () + bc fffffd7fff2c72a0 _lwp_start () ----------------- lwp# 7 / thread# 7 -------------------- fffffd7fff2cf54a sigtimedwait (3011f4530, fffffd7ffe162cb0, 0) fffffd7fff2b8704 sigwaitinfo () + c 00000000004db088 sigwait_compat () + 48 fffffd7fff2c6fd4 _thrp_setup () + bc fffffd7fff2c72a0 _lwp_start ()