Closed Code-Hex closed 1 year ago
Guest disk information
$ df -h
Filesystem Size Used Avail Use% Mounted on
udev 952M 0 952M 0% /dev
tmpfs 197M 740K 197M 1% /run
/dev/vda 11G 3.0G 8.1G 27% /
tmpfs 985M 0 985M 0% /dev/shm
tmpfs 5.0M 0 5.0M 0% /run/lock
tmpfs 985M 0 985M 0% /sys/fs/cgroup
/dev/loop1 58M 58M 0 100% /snap/core20/1171
/dev/loop3 49M 49M 0 100% /snap/core18/2127
/dev/loop0 49M 49M 0 100% /snap/core18/2252
/dev/loop2 58M 58M 0 100% /snap/core20/1274
/dev/loop5 61M 61M 0 100% /snap/lxd/21544
/dev/loop4 38M 38M 0 100% /snap/snapd/14296
/dev/loop6 29M 29M 0 100% /snap/snapd/13643
/dev/loop7 61M 61M 0 100% /snap/lxd/21843
tmpfs 197M 0 197M 0% /run/user/0
current my opinion https://github.com/Code-Hex/vz/pull/12#issuecomment-992072181
I think this is caused by the thread between
libdispatch
(which is handled by Objective-C) and Go, or a problem on the VM side.
Would be worth building/running with go build -race ...
to see if the go runtime has anything to report. With go routines and macOS dispatch queues involved, it's going to be more or less required to protect these from concurrent access:
socket.go:var connectionHandlers = map[string]func(conn *VirtioSocketConnection, err error){}
socket.go:var shouldAcceptNewConnectionHandlers = map[unsafe.Pointer]func(conn *VirtioSocketConnection) bool{}
Had done something like this in an experimental branch which is now obsolete :) https://github.com/cfergeau/vz/commit/1aa2c8c36f59d1bc9125e30c0f972991977cd59e
I am also not able to close the VirtioSocketConnection
, close return nil, but io.Copy does not return.
func NewProxyVSOCK(p *Publish, vso *vz.VirtioSocketDevice, port uint32) func(conn net.Conn) {
return func(conn net.Conn) {
klog.Infof("[MAC_OS]prepare to connect to vm port: %d", port)
onConnect := func(vzconn *vz.VirtioSocketConnection, err error) {
defer Close(conn, "MAC_OS: unix --> vsock")
klog.Infof("[MAC_OS][%s]remote connection to vm established: %s",p,vzconn)
//defer Close(vzconn, "MAC_OS: vsock --> unix")
//stop := make(chan struct{}, 1)
grp := &sync.WaitGroup{}
grp.Add(2)
//go copyStream(grp,vzconn, conn, "MAC_OS: unix --> vsock",false)
go func() {
msg := "MAC_OS: unix --> vsock"
defer Close(conn, msg)
n, err := io.Copy(vzconn, conn)
if err != nil {
klog.Errorf("[%s]copy to vm: %d bytes copied, %s",msg, n, err.Error())
}
klog.Infof("local conn done")
grp.Done()
Close(vzconn,"")
}()
//go copyStream(nil,conn, vzconn, "MAC_OS: vsock --> unix",false)
go func() {
msg := "MAC_OS: vsock --> unix"
n, err := io.Copy(conn, vzconn)
if err != nil {
klog.Errorf("[%s]copy to vm: %d bytes copied, %s",msg, n, err.Error())
}
klog.Infof("vzconn: done")
grp.Done()
}()
klog.Infof("[MAC_OS][%s]unix --> vsock started, wait done: ", p)
grp.Wait()
klog.Infof("[MAC_OS][%s]receive goroutine sig 1",p)
//<-closer
}
if vso == nil {
klog.Errorf("[MAC_OS]Attention, nil vso: [%v]", vso)
}
klog.Infof("[MAC_OS] do connect to port")
vso.ConnectToPort(port, onConnect)
klog.Infof("[MAC_OS][%s]connection complete: %s",p, conn.RemoteAddr())
}
}
here is the output
podman login: I0113 21:34:45.277146 25120 publish.go:112] new connection:
I0113 21:34:45.277207 25120 proxy_darwin.go:16] [MAC_OS]prepare to connect to vm port: 2375
I0113 21:34:45.277216 25120 proxy_darwin.go:56] [MAC_OS] do connect to port
I0113 21:34:45.278198 25120 proxy_darwin.go:59] [MAC_OS][unix@/tmp/aoxn.sock:vsock@2375]connection complete:
I0113 21:34:45.279323 25120 proxy_darwin.go:19] [MAC_OS][unix@/tmp/aoxn.sock:vsock@2375]remote connection to vm established: &{c7g2ktdmk1u64864pt80 %!s(uint32=3283772544) %!s(uint32=2375) %!s(uintptr=10) %!s(*os.File=&{0xc0004ce120}) %!s(*vz.Addr=&{2 2375}) %!s(*vz.Addr=&{0 3283772544})}
I0113 21:34:45.279654 25120 proxy_darwin.go:48] [MAC_OS][unix@/tmp/aoxn.sock:vsock@2375]unix --> vsock started, wait done:
I0113 21:34:49.783454 25120 proxy_darwin.go:34] local conn done
I0113 21:34:49.783544 25120 publish.go:185] []close connection
I0113 21:34:49.783559 25120 publish.go:185] [MAC_OS: unix --> vsock]close connection
vzconn.Read() does not return after vzconn.Close().
the sequence to reproduce.
vzconn := Connect2Port()
grp := sync.WaitGroup{}
grp.Add(1)
go func() {
bb := make([]byte,1024)
n, err := vzconn.Read(bb)
if err != nil {
t.Logf("read error: %s", err.Error())
}
t.Logf("read %d byte: xxx", n)
grp.Done()
}()
t.Logf("sleep 5s")
time.Sleep(5*time.Second)
vzconn.Close()
t.Logf("sleep 3")
time.Sleep(3*time.Second)
t.Logf("wait")
grp.Wait()
t.Logf("finishe")
need to setNoneBlock mode for the descriptor.
func newVirtioSocketConnection(ptr unsafe.Pointer) *VirtioSocketConnection {
id := xid.New().String()
vzVirtioSocketConnection := C.convertVZVirtioSocketConnection2Flat(ptr)
err := unix.SetNonblock(int(vzVirtioSocketConnection.fileDescriptor), true)
if err != nil {
panic(fmt.Sprintf("set nonblock: %s", err.Error()))
}
conn := &VirtioSocketConnection{
id: id,
sourcePort: (uint32)(vzVirtioSocketConnection.sourcePort),
destinationPort: (uint32)(vzVirtioSocketConnection.destinationPort),
fileDescriptor: (uintptr)(vzVirtioSocketConnection.fileDescriptor),
file: os.NewFile((uintptr)(vzVirtioSocketConnection.fileDescriptor), id),
laddr: &Addr{
CID: unix.VMADDR_CID_HOST,
Port: (uint32)(vzVirtioSocketConnection.destinationPort),
},
raddr: &Addr{
CID: unix.VMADDR_CID_HYPERVISOR,
Port: (uint32)(vzVirtioSocketConnection.sourcePort),
},
}
return conn
}
fixed.
I don't know why need to setNoneBlock mode for the descriptor? could you tell me if possible. thanks relates: https://github.com/Code-Hex/vz/pull/23
golang lib use epoll to implement synchronized Read and Write. This requires a non-block socket. see golang implementation for more details, https://github.com/golang/go/blob/4f04e1d99fac7abf067b6bd3a299f1fbc9a59414/src/net/sock_cloexec.go#L45
I don't think non blocking is related
@Code-Hex Sorry for the miss-leading, #23 does not intended to solve #13, it fix the problem of copyStream blocks on reading forever.
issue #24 might related with the crash scenario. remove code runtime.SetFinalizer(socketDevice, func(self *VirtioSocketDevice) { self.Release() })
seems fixed.
Guest Side
Linux km-silicon 5.4.0-84-generic #94-Ubuntu SMP Thu Aug 26 19:28:28 UTC 2021 aarch64 aarch64 aarch64 GNU/Linux
Clash log
``` root@km-silicon:~# ./cli.sh 2021/12/13 02:23:23 socat[980] E connect(5, AF=40 cid:2 port:8321, 16): Connection reset by peer 2021/12/13 02:23:24 socat[984] E connect(5, AF=40 cid:2 port:8321, 16): Connection reset by peer 2021/12/13 02:23:25 socat[987] E connect(5, AF=40 cid:2 port:8321, 16): Connection reset by peer [ 102.328859] kernel BUG at arch/arm64/kernel/traps.c:405! [ 102.329110] Internal error: Oops - BUG: 0 [#1] SMP [ 102.329222] Modules linked in: dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua bridge stp llc vmw_vsock_virtio_transport vmw_vsock_virtio_transport_common vsock virtio_rng uio_pdrv_genirq uio sch_fq_codel drm ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor xor_neon raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_ce ghash_ce sha3_ce sha3_generic sha512_ce sha512_arm64 sha2_ce sha256_arm64 sha1_ce virtio_blk virtio_net net_failover failover aes_neon_bs aes_neon_blk aes_ce_blk crypto_simd cryptd aes_ce_cipher [ 102.330499] CPU: 0 PID: 352 Comm: systemd-journal Not tainted 5.4.0-84-generic #94-Ubuntu [ 102.330606] pstate: 00400005 (nzcv daif +PAN -UAO) [ 102.330733] pc : do_undefinstr+0x68/0x70 [ 102.330788] lr : do_undefinstr+0x3c/0x70 [ 102.330836] sp : ffff80001203bbc0 [ 102.330883] x29: ffff80001203bbc0 x28: ffff0000a9b0cb00 [ 102.330959] x27: 0000000000000000 x26: 0000000000000000 [ 102.331030] x25: 000000007fff0000 x24: 0000000000000000 [ 102.331104] x23: 0000000040400005 x22: ffff80000002517c [ 102.331180] x21: ffff80001203bd20 x20: 0000ffffffffffff [ 102.331249] x19: ffff80001203bbe0 x18: 0000000000000000 [ 102.331322] x17: 0000000000000000 x16: 0000000000000000 [ 102.331400] x15: 0000000000000000 x14: 0000000000000000 [ 102.331469] x13: 0000000000000000 x12: 0000000000000000 [ 102.331537] x11: 0000000000000000 x10: 0000000000000000 [ 102.331608] x9 : 0000000040000040 x8 : 0000ffffc27c05e0 [ 102.331714] x7 : 0000ffffab6345a8 x6 : ffff80001203bbb8 [ 102.331788] x5 : 0000000000000000 x4 : 0000000000000000 [ 102.331857] x3 : ffff800011b85a78 x2 : 0000000000000000 [ 102.331930] x1 : 0000000000000000 x0 : 0000000040400005 [ 102.332005] Call trace: [ 102.332044] do_undefinstr+0x68/0x70 [ 102.332125] el1_undef+0x10/0xb4 [ 102.332239] 0xffff80000002517c [ 102.332320] __secure_computing+0x50/0xe0 [ 102.332389] syscall_trace_enter+0x184/0x1c0 [ 102.332463] el0_svc_common.constprop.0+0x70/0x1f0 [ 102.332536] el0_svc_handler+0x38/0xa8 [ 102.332602] el0_svc+0x10/0x2c8 [ 102.332671] Code: 94005368 f9400bf3 a8c27bfd d65f03c0 (d4210000) [ 102.332850] ---[ end trace 82c01dc59ca40dcd ]--- [ 102.368087] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 1. [ 102.388590] systemd[1]: Stopping Flush Journal to Persistent Storage... [ 102.403682] systemd[1]: motd-news.service: Succeeded. [ 102.404700] systemd[1]: Finished Message of the Day. [ 102.406742] systemd[1]: systemd-journal-flush.service: Succeeded. [ 102.407392] systemd[1]: Stopped Flush Journal to Persistent Storage. [ 102.407667] systemd[1]: Stopped Journal Service. [ 102.411246] systemd[1]: Starting Journal Service... [ 102.455216] ------------[ cut here ]------------ [ 102.455294] WARNING: CPU: 0 PID: 0 at kernel/rcu/tree.c:569 rcu_idle_enter+0x74/0x80 [ 102.455355] Modules linked in: dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua bridge stp llc vmw_vsock_virtio_transport vmw_vsock_virtio_transport_common vsock virtio_rng uio_pdrv_genirq uio sch_fq_codel drm ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor xor_neon raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_ce ghash_ce sha3_ce sha3_generic sha512_ce sha512_arm64 sha2_ce sha256_arm64 sha1_ce virtio_blk virtio_net net_failover failover aes_neon_bs aes_neon_blk aes_ce_blk crypto_simd cryptd aes_ce_cipher [ 102.455747] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G D 5.4.0-84-generic #94-Ubuntu [ 102.455820] pstate: 20c00085 (nzCv daIf +PAN +UAO) [ 102.455857] pc : rcu_idle_enter+0x74/0x80 [ 102.455897] lr : do_idle+0x210/0x298 [ 102.455921] sp : ffff800011b73ed0 [ 102.455941] x29: ffff800011b73ed0 x28: 00000000714b0018 [ 102.455983] x27: 0000000000000000 x26: 0000000000000000 [ 102.456003] x25: 0000000000000000 x24: ffff800011b79454 [ 102.456028] x23: ffff800011b82e40 x22: ffff800011691838 [ 102.456055] x21: ffff800011b79418 x20: 0000000000000000 [ 102.456092] x19: ffff800011b79338 x18: 0000000000000000 [ 102.456138] x17: 0000000000000000 x16: 0000000000000000 [ 102.456167] x15: 0000000000000000 x14: 0000000000000000 [ 102.456210] x13: 0000000000000000 x12: 0000000000000001 [ 102.456238] x11: ffff800011258368 x10: 0000000000000ad0 [ 102.456273] x9 : ffff800011b73e00 x8 : ffff800011b83970 [ 102.456310] x7 : 0000000000000000 x6 : 000051d67bbdf899 [ 102.456336] x5 : 00ffffffffffffff x4 : 0000000000001ae2 [ 102.456371] x3 : 4000000000000002 x2 : 4000000000000000 [ 102.456407] x1 : ffff800011693b00 x0 : ffff0000adbe6b00 [ 102.456432] Call trace: [ 102.456444] rcu_idle_enter+0x74/0x80 [ 102.456489] do_idle+0x210/0x298 [ 102.456538] cpu_startup_entry+0x30/0xb8 [ 102.456587] rest_init+0xc0/0xcc [ 102.456639] arch_call_rest_init+0x18/0x20 [ 102.456671] start_kernel+0x4cc/0x500 [ 102.456689] ---[ end trace 82c01dc59ca40dce ]--- [ 102.536777] systemd-journald[1240]: File /var/log/journal/4c97dcebbda44c73a9830202169730e5/system.journal corrupted or uncleanly shut down, renaming and replacing. [ 102.629417] systemd[1]: Started Journal Service. [ 102.638990] systemd-journald[1240]: Received client request to flush runtime journal. zsh: segmentation fault ./virtualization ```Host Side
Clash log
I've gotten by `$ cat ~/Library/Logs/DiagnosticReports/virtualization_2021-12-13-113904_mac-mini.crash | pbcopy` ``` Process: virtualization [18487] Path: /Users/USER/*/virtualization Identifier: virtualization Version: 0 Code Type: ARM-64 (Native) Parent Process: zsh [15217] User ID: 501 Date/Time: 2021-12-13 11:39:04.024 +0900 OS Version: macOS 11.6 (20G165) Report Version: 12 Anonymous UUID: 1C28D546-716A-F6B7-A889-1A3819FB7AAC Sleep/Wake UUID: 725F0E6B-3962-4620-86C6-FCB3D8C9A235 Time Awake Since Boot: 3700000 seconds Time Since Wake: 1700 seconds System Integrity Protection: enabled Crashed Thread: 0 Dispatch queue: com.apple.main-thread Exception Type: EXC_CRASH (SIGSEGV) Exception Codes: 0x0000000000000000, 0x0000000000000000 Exception Note: EXC_CORPSE_NOTIFY Termination Signal: Segmentation fault: 11 Termination Reason: Namespace SIGNAL, Code 0xb Terminating Process: virtualization [18487] Thread 0 Crashed:: Dispatch queue: com.apple.main-thread 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdac _pthread_cond_wait + 1248 2 virtualization 0x00000001041a8138 runtime.pthread_cond_wait_trampoline + 24 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 1: 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdd8 _pthread_cond_wait + 1292 2 virtualization 0x00000001041a816c runtime.pthread_cond_timedwait_relative_np_trampoline + 28 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 2: 0 libsystem_kernel.dylib 0x000000018cfe59bc read + 8 1 virtualization 0x00000001041a799c runtime.read_trampoline + 28 Thread 3: 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdac _pthread_cond_wait + 1248 2 virtualization 0x00000001041a8138 runtime.pthread_cond_wait_trampoline + 24 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 4: 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdac _pthread_cond_wait + 1248 2 virtualization 0x00000001041a8138 runtime.pthread_cond_wait_trampoline + 24 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 5: 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdac _pthread_cond_wait + 1248 2 virtualization 0x00000001041a8138 runtime.pthread_cond_wait_trampoline + 24 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 6: 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdac _pthread_cond_wait + 1248 2 virtualization 0x00000001041a8138 runtime.pthread_cond_wait_trampoline + 24 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 7: 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdac _pthread_cond_wait + 1248 2 virtualization 0x00000001041a8138 runtime.pthread_cond_wait_trampoline + 24 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 8: 0 libsystem_pthread.dylib 0x000000018d01a5cc start_wqthread + 0 Thread 9:: Dispatch queue: c6rb3g5mmud4gdpjo080 0 libsystem_kernel.dylib 0x000000018cfe8344 __semwait_signal + 8 1 libsystem_c.dylib 0x000000018cf64284 nanosleep + 216 2 libsystem_c.dylib 0x000000018cf64148 usleep + 68 3 virtualization 0x00000001041a7d84 runtime.usleep_trampoline + 20 4 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 10: 0 libsystem_kernel.dylib 0x000000018cfe8548 __psynch_cvwait + 8 1 libsystem_pthread.dylib 0x000000018d01fdac _pthread_cond_wait + 1248 2 virtualization 0x00000001041a8138 runtime.pthread_cond_wait_trampoline + 24 3 virtualization 0x00000001041a6ec8 runtime.asmcgocall + 200 Thread 0 crashed with ARM Thread State (64-bit): x0: 0x0000000000000104 x1: 0x0000000000000000 x2: 0x0000000000000300 x3: 0x0000000000000000 x4: 0x0000000000000000 x5: 0x00000000000000a0 x6: 0x0000000000000000 x7: 0x0000000000000000 x8: 0x000000016bcbe858 x9: 0x0000000000000301 x10: 0x00000001042dbd78 x11: 0x0000010000000102 x12: 0x0000000000000100 x13: 0x0000000000000000 x14: 0x0000000000000100 x15: 0x0000000000000001 x16: 0x0000000000000131 x17: 0x00000001fbaa6e48 x18: 0x0000000000000000 x19: 0x00000001042dbd60 x20: 0x00000001042dbda0 x21: 0x0000000104537e20 x22: 0x0000000000000000 x23: 0x0000000000000000 x24: 0x0000000000000300 x25: 0x0000000000000301 x26: 0x0000000000000400 x27: 0x0000000000000020 x28: 0x00000001042db620 fp: 0x000000016bcbe8d0 lr: 0x000000018d01fdac sp: 0x000000016bcbe840 pc: 0x000000018cfe8548 cpsr: 0x60000000 far: 0x000000016811b6e8 esr: 0x56000080 Binary Images: 0x104140000 - 0x10421bfff +virtualization (0)