Closed Martins3 closed 3 years ago
vcpu
has the same content as the last one?
kvm_arch_vcpu_create
static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu)
{
int i;
for_each_possible_cpu(i) vcpu->arch.vzguestid[i] = 0;
return 0; }
If CLONE_FILES is set, the calling process and the child
process share the same file descriptor table. Any file
descriptor created by the calling process or by the child
process is also valid in the other process. Similarly, if
one of the processes closes a file descriptor, or changes
its associated flags (using the fcntl(2) F_SETFD
operation), the other process is also affected. If a
process sharing a file descriptor table calls execve(2),
its file descriptor table is duplicated (unshared).
If CLONE_FILES is not set, the child process inherits a
copy of all file descriptors opened in the calling process
at the time of the clone call. Subsequent operations that
open or close file descriptors, or change file descriptor
flags, performed by either the calling process or the
child process do not affect the other process. Note,
however, that the duplicated file descriptors in the child
refer to the same open file descriptions as the
corresponding file descriptors in the calling process, and
thus share file offsets and file status flags (see
open(2)).
[x] it seems the host stack has to be copied manually?
[x] recheck the mmap(2)
's flags, share or not share between child and parent process?
[x] I have to copy simd_regs
and fpu_regs
?
simd_regs
and fpu_regs
[x] how to init simd_regs
and fpu_regs
?
[x] what's get_regs
and set_regs
's capability ?
[ ] get this special syscalls: /home/maritns3/core/linux/arch/mips/kernel/syscalls/syscall_n64.tbl
cpu
in host_loop
/home/maritns3/core/loongson-dune/cross/arch/mips/kernel/scall64-64.S
syscall_common:
dsubu t2, v0, __NR_64_Linux
sltiu t0, t2, __NR_64_Linux_syscalls + 1
beqz t0, illegal_syscall
dsll t0, t2, 3 # offset into table
dla t2, sys_call_table
daddu t0, t2, t0
ld t2, (t0) # syscall routine
beqz t2, illegal_syscall
jalr t2 # Do The Real Thing (TM)
li t0, -EMAXERRNO - 1 # error?
# #define EMAXERRNO 1133
sltu t0, t0, v0
# To record the result of an unsigned less-than comparison.
# Description: GPR[rd] <- (GPR[rs] < GPR[rt])
sd t0, PT_R7(sp) # set error flag
beqz t0, 1f
ld t1, PT_R2(sp) # syscall number
dnegu v0 # error
sd t1, PT_R0(sp) # save it for syscall restarting
1: sd v0, PT_R2(sp) # result
n64_syscall_exit:
j syscall_exit_partial
>>> disass 0x1200095b4
Dump of assembler code for function __syscall0:
0x00000001200095b0 <+0>: daddu v0,zero,a0
0x00000001200095b4 <+4>: syscall
0x00000001200095b8 <+8>: beqz a3,0x1200095cc <__syscall0+28>
0x00000001200095bc <+12>: nop
0x00000001200095c0 <+16>: blez v0,0x1200095cc <__syscall0+28>
0x00000001200095c4 <+20>: nop
0x00000001200095c8 <+24>: dnegu v0,v0
0x00000001200095cc <+28>: jr ra
0x00000001200095d0 <+32>: nop
End of assembler dump.
copy_cp0(parent_cpu, child_cpu, KVM_REG_MIPS_CP0_EPC);
>>> disass 0x120009a94
Dump of assembler code for function __stdio_write:
0x0000000120009a20 <+0>: daddiu sp,sp,-96
0x0000000120009a24 <+4>: sd gp,80(sp)
0x0000000120009a28 <+8>: lui gp,0x2
0x0000000120009a2c <+12>: ld v1,56(a0)
0x0000000120009a30 <+16>: sd s2,48(sp)
0x0000000120009a34 <+20>: daddiu gp,gp,-1952
0x0000000120009a38 <+24>: ld s2,40(a0)
0x0000000120009a3c <+28>: daddu gp,gp,t9
0x0000000120009a40 <+32>: dsubu s2,s2,v1
0x0000000120009a44 <+36>: sd s5,72(sp)
0x0000000120009a48 <+40>: ld s5,-32160(gp)
0x0000000120009a4c <+44>: sd s4,64(sp)
0x0000000120009a50 <+48>: sd s3,56(sp)
0x0000000120009a54 <+52>: sd s1,40(sp)
0x0000000120009a58 <+56>: sd s0,32(sp)
0x0000000120009a5c <+60>: sd s2,8(sp)
0x0000000120009a60 <+64>: sd ra,88(sp)
0x0000000120009a64 <+68>: move s0,a0
0x0000000120009a68 <+72>: move s4,a2
0x0000000120009a6c <+76>: sd v1,0(sp)
0x0000000120009a70 <+80>: sd a1,16(sp)
0x0000000120009a74 <+84>: sd a2,24(sp)
0x0000000120009a78 <+88>: daddu s2,s2,a2
0x0000000120009a7c <+92>: li s3,2
0x0000000120009a80 <+96>: move s1,sp
0x0000000120009a84 <+100>: lw a0,120(s0)
0x0000000120009a88 <+104>: move a1,s1
0x0000000120009a8c <+108>: move a2,s3
0x0000000120009a90 <+112>: daddiu v0,zero,5019
0x0000000120009a94 <+116>: syscall
0x0000000120009a98 <+120>: beqz a3,0x120009aac <__stdio_write+140>
0x0000000120009a9c <+124>: move a0,v0
0x0000000120009aa0 <+128>: blez v0,0x120009aac <__stdio_write+140>
0x0000000120009aa4 <+132>: nop
0x0000000120009aa8 <+136>: dnegu a0,v0
0x0000000120009aac <+140>: move t9,s5
0x0000000120009ab0 <+144>: bal 0x120004eb0 <__syscall_ret>
0x0000000120009ab4 <+148>: nop
0x0000000120009ab8 <+152>: bne v0,s2,0x120009b04 <__stdio_write+228>
0x0000000120009abc <+156>: move v1,v0
0x0000000120009ac0 <+160>: ld v0,88(s0)
0x0000000120009ac4 <+164>: ld v1,96(s0)
0x0000000120009ac8 <+168>: sd v0,56(s0)
0x0000000120009acc <+172>: daddu v1,v0,v1
0x0000000120009ad0 <+176>: sd v1,32(s0)
0x0000000120009ad4 <+180>: sd v0,40(s0)
0x0000000120009ad8 <+184>: ld ra,88(sp)
0x0000000120009adc <+188>: move v0,s4
0x0000000120009ae0 <+192>: ld gp,80(sp)
0x0000000120009ae4 <+196>: ld s5,72(sp)
0x0000000120009ae8 <+200>: ld s4,64(sp)
0x0000000120009aec <+204>: ld s3,56(sp)
0x0000000120009af0 <+208>: ld s2,48(sp)
0x0000000120009af4 <+212>: ld s1,40(sp)
0x0000000120009af8 <+216>: ld s0,32(sp)
0x0000000120009afc <+220>: jr ra
0x0000000120009b00 <+224>: daddiu sp,sp,96
0x0000000120009b04 <+228>: bgez v0,0x120009b38 <__stdio_write+280>
0x0000000120009b08 <+232>: nop
0x0000000120009b0c <+236>: lw v0,0(s0)
0x0000000120009b10 <+240>: sd zero,32(s0)
0x0000000120009b14 <+244>: ori v0,v0,0x20
0x0000000120009b18 <+248>: sw v0,0(s0)
0x0000000120009b1c <+252>: li v0,2
0x0000000120009b20 <+256>: sd zero,56(s0)
0x0000000120009b24 <+260>: beq s3,v0,0x120009b70 <__stdio_write+336>
0x0000000120009b28 <+264>: sd zero,40(s0)
0x0000000120009b2c <+268>: ld v0,8(s1)
0x0000000120009b30 <+272>: b 0x120009ad8 <__stdio_write+184>
0x0000000120009b34 <+276>: dsubu s4,s4,v0
0x0000000120009b38 <+280>: ld a0,8(s1)
0x0000000120009b3c <+284>: sltu a1,a0,v0
0x0000000120009b40 <+288>: beqz a1,0x120009b54 <__stdio_write+308>
0x0000000120009b44 <+292>: dsubu s2,s2,v0
0x0000000120009b48 <+296>: dsubu v1,v0,a0
0x0000000120009b4c <+300>: daddiu s1,s1,16
0x0000000120009b50 <+304>: addiu s3,s3,-1
0x0000000120009b54 <+308>: ld a0,0(s1)
0x0000000120009b58 <+312>: daddu a0,a0,v1
0x0000000120009b5c <+316>: sd a0,0(s1)
0x0000000120009b60 <+320>: ld a0,8(s1)
0x0000000120009b64 <+324>: dsubu v1,a0,v1
0x0000000120009b68 <+328>: b 0x120009a84 <__stdio_write+100>
0x0000000120009b6c <+332>: sd v1,8(s1)
0x0000000120009b70 <+336>: b 0x120009ad8 <__stdio_write+184>
0x0000000120009b74 <+340>: move s4,zero
End of assembler dump.
History: #0
Commit: 6d4e4c4fca5be806b888d606894d914847e82d78
Author: Avi Kivity <avi@qumranet.com>
Author Date: Wed 21 Nov 2007 10:41:05 PM CST
Committer Date: Wed 30 Jan 2008 11:53:13 PM CST
KVM: Disallow fork() and similar games when using a VM
We don't want the meaning of guest userspace changing under our feet.
Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index aec6b67cfebb..0efd759e585f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -165,6 +165,8 @@ static struct kvm *kvm_create_vm(void)
if (IS_ERR(kvm))
goto out;
+ kvm->mm = current->mm;
+ atomic_inc(&kvm->mm->mm_count);
kvm_io_bus_init(&kvm->pio_bus);
mutex_init(&kvm->lock);
kvm_io_bus_init(&kvm->mmio_bus);
the child should be exactly same, it points to the next instruction of syscall
>>> disass 0x120009098 (<----------- this is parent epc)
Dump of assembler code for function __clone:
0x0000000120009070 <+0>: li at,-16
0x0000000120009074 <+4>: and a1,a1,at
0x0000000120009078 <+8>: daddiu a1,a1,-16
0x000000012000907c <+12>: sd a0,0(a1)
0x0000000120009080 <+16>: sd a3,8(a1)
0x0000000120009084 <+20>: move a0,a2
0x0000000120009088 <+24>: move a2,a4
0x000000012000908c <+28>: move a3,a5
0x0000000120009090 <+32>: move a4,a6
0x0000000120009094 <+36>: li v0,5055
0x0000000120009098 <+40>: syscall
0x000000012000909c <+44>: beqz a3,0x1200090ac <__clone+60>
0x00000001200090a0 <+48>: nop
0x00000001200090a4 <+52>: jr ra
0x00000001200090a8 <+56>: dnegu v0,v0
0x00000001200090ac <+60>: beqz v0,0x1200090bc <__clone+76>
0x00000001200090b0 <+64>: nop
0x00000001200090b4 <+68>: jr ra
0x00000001200090b8 <+72>: nop
0x00000001200090bc <+76>: ld t9,0(sp)
0x00000001200090c0 <+80>: ld a0,8(sp)
0x00000001200090c4 <+84>: jalr t9
0x00000001200090c8 <+88>: nop
0x00000001200090cc <+92>: move a0,v0
0x00000001200090d0 <+96>: li v0,5058
0x00000001200090d4 <+100>: syscall
0x00000001200090d8 <+104>: nop
0x00000001200090dc <+108>: nop
End of assembler dump.
EXCCODE_INT
in kvm_mips_handle_exit ?here is my assumption:
My ideas for the bug: maybe it's caused by remote TLB shoot:
No, remote TLB shoot, which will call kvm_vz_local_flush_roottlb_all_guests
for_each_possible_cpu(i)
vcpu->arch.vzguestid[i] = 0;
[99132.196899] kvm [739]: kvm_mips_handle_exit: cause: 0x10800008, PC: 120008f4c, kvm_run: 000000001d0d7069, kvm_vcpu: 000000006df3ba79 cause=40008008
[99132.196902] kvm [739]: TLB LD fault: cause 0x10800008, PC: 000000008da9c0d9, BadVaddr: 0xfffbcf30b0
[99132.196912] kvm [739]: kvm_vz_host_tlb_inv: Invalidated root entryhi 0xfffbcf2000 @ idx 2014
[99132.196914] kvm [739]: kvm_mips_handle_exit : cause register = 40008008 exit_reason=0
[99132.196918] kvm [739]: kvm_mips_handle_exit: cause: 0x10800008, PC: fffbcf30f0, kvm_run: 000000001d0d7069, kvm_vcpu: 000000006df3ba79 cause=40008008
[99132.196921] kvm [739]: TLB LD fault: cause 0x10800008, PC: 000000008daf6e2f, BadVaddr: 0x0
[99132.196923] kvm [739]: Failed to find VMA for hva 0x0
Reading symbols from clone01...done.
>>> disass 0x120008f4c
Dump of assembler code for function __clone:
0x0000000120008f00 <+0>: li at,-16
0x0000000120008f04 <+4>: and a1,a1,at
0x0000000120008f08 <+8>: daddiu a1,a1,-16
0x0000000120008f0c <+12>: sd a0,0(a1)
0x0000000120008f10 <+16>: sd a3,8(a1)
0x0000000120008f14 <+20>: move a0,a2
0x0000000120008f18 <+24>: move a2,a4
0x0000000120008f1c <+28>: move a3,a5
0x0000000120008f20 <+32>: move a4,a6
0x0000000120008f24 <+36>: li v0,5055
0x0000000120008f28 <+40>: syscall
0x0000000120008f2c <+44>: beqz a3,0x120008f3c <__clone+60>
0x0000000120008f30 <+48>: nop
0x0000000120008f34 <+52>: jr ra
0x0000000120008f38 <+56>: dnegu v0,v0
0x0000000120008f3c <+60>: beqz v0,0x120008f4c <__clone+76>
0x0000000120008f40 <+64>: nop
0x0000000120008f44 <+68>: jr ra
0x0000000120008f48 <+72>: nop
0x0000000120008f4c <+76>: ld t9,0(sp) ##### SP doesn't set correctly, PC goes to somewhere randomly ###
0x0000000120008f50 <+80>: ld a0,8(sp)
0x0000000120008f54 <+84>: jalr t9
0x0000000120008f58 <+88>: nop
0x0000000120008f5c <+92>: move a0,v0
0x0000000120008f60 <+96>: li v0,5058
0x0000000120008f64 <+100>: syscall
0x0000000120008f68 <+104>: nop
0x0000000120008f6c <+108>: nop
Child's sp will set to parameter __child_stack
of function clone
, but our syscall simulation just copies the parent process's regs.
When I change host_loop to the original way, I get into trouble again.
[x] I know we minus 16, but why musl doesn't
$0 : 0000000000000000 fffffffffffffff0 00000000000013bf 000000fffbdd5e44
$4 : 0000000000000112 000000fff6d4fff0 0000000000002000 0000000000000064
$8 : 0000000000000036 0000000000000064 0000000000000036 000000000000000a
$12 : 000000006c696863 000000012000d8a0 ffffffffffffffff ffffffff81440000
$16 : 0000000000002e08 0000000000000001 000000fffbdd5ed8 000000012000151c
$20 : 000000fff4be9450 00000001201081d0 0000000000000000 000000fff4be94a0
$24 : 0000000000000004 00000001200092e0 9800000121a60138 0000000000000000
$28 : 00000001200282a0 000000fffbdd5de0 000000fffbdd5e20 0000000120005c28
hi : 0000000000000000
lo : 0000000000000000
pc : 980000fff6d581cc
Info: clone flags : 112
two vcpu
Info: kvm_alloc_vcpu begin
Info: kvm_alloc_vcpu get lock
Info: kvm_alloc_vcpu end 1
allocate vcpu 1
Registers:
----------
$0 : 0000000000000000 fffffffffffffff0 0000000000000000 000000fffbdd5e44
$4 : 0000000000000112 000000fff6d4fff0 0000000000002000 0000000000000000
$8 : 0000000000000036 0000000000000064 0000000000000036 000000000000000a
$12 : 000000006c696863 000000012000d8a0 ffffffffffffffff ffffffff81440000
$16 : 0000000000002e08 0000000000000001 000000fffbdd5ed8 000000012000151c
$20 : 000000fff4be9450 00000001201081d0 0000000000000000 000000fff4be94a0
$24 : 0000000000000004 00000001200092e0 9800000121a60138 0000000000000000
$28 : 00000001200282a0 000000fff6d4fff0 000000fffbdd5e20 0000000120005c28
hi : 0000000000000000
lo : 0000000000000000
pc : 980000fff6d581cc
/* save KScratch registers if enabled in guest */
if (cpu_guest_has_conf4) {
if (cpu_guest_has_kscr(2))
kvm_save_gc0_kscratch1(cop0);
if (cpu_guest_has_kscr(3))
kvm_save_gc0_kscratch2(cop0);
if (cpu_guest_has_kscr(4))
kvm_save_gc0_kscratch3(cop0);
if (cpu_guest_has_kscr(5))
kvm_save_gc0_kscratch4(cop0);
if (cpu_guest_has_kscr(6))
kvm_save_gc0_kscratch5(cop0);
if (cpu_guest_has_kscr(7))
kvm_save_gc0_kscratch6(cop0);
}