Closed fengb closed 4 years ago
const OpArg = union(enum) {
u8: u8,
u16: u16,
reg8: CpuReg8,
reg16: CpuReg16,
};
const Op = struct {
microop: fn(fd: *Fundude, arg0: OpArg, arg1: OpArg) OpResult,
arg0: OpArg,
arg1: OpArg,
op_len: u4,
zasm: []const u8,
};
const OpResult = struct {
jump: u16,
cycles: u16,
};
const LD = struct {
fn µ_rr_d8(arg0: OpArg, arg1: OpArg) OpResult {
return OpResult{ .jump = arg1.u16, .cycles = arg0.u8 };
}
pub fn rr_d8(arg0: u8, arg1: u16) Op {
return Op{
.microop = µ_rr_d8,
.arg0 = OpArg{ .u8 = arg0 },
.arg1 = OpArg{ .u16 = arg1 },
.zasm = "LD",
};
}
};```
Might be worth deferring until https://github.com/ziglang/zig/issues/1717 for function expressions:
const ld__rr_d8 = fn(arg0: u8, arg1: u16) Op {
return Op{
.zasm = "LD",
.arg0 = OpArg{ .u8 = arg0 },
.arg1 = OpArg{ .u16 = arg1 },
.microop = fn(arg0: OpArg, arg1: OpArg) OpResult {
return OpResult{ .jump = arg1.u16, .cycles = arg0.u8 };
}
};
};
More CPU cache friendly layout:
const Op = struct {
// 2 bytes
.microop = enum { .ld__rr_d8, ... };
// 2 bytes
arg0: OpArg,
// 2 bytes
arg1: OpArg,
len: u8,
cycles: packed tuple { u4, u4 },
};
Thoughts:
Maybe try comptime magic similar to wazm:
pub fn decode(data: [*]u8) Op {
return switch (data[0]) {
0x06 => Op.init(.ld__rr_d8, Reg8.B, with8(inst)),
};
}
pub fn run(cpu: *main.Cpu, mmu: *main.Mmu, op: Op) Result {
// Microps should be a generated enum
const func = switch (op.microp) {
.ld__rr_d8 => ld__rr_d8,
};
// Casts should be noops due to packed struct magic.
const args = @typeInfo(@TypeOf(func)).Fn.args;
const arg0 = @bitCast(arg_types[0], op.arg0);
const arg1 = @bitCast(arg_types[1], op.arg1);
return @bitCast(Result, func(cpu, mmu, arg0, arg1));
}
pub fn ld__rr_d8(cpu: *main.Cpu, mmu: *main.Mmu, tgt: Arg.Reg8, d8: Arg.U8) Result.Fixed(1, 8) {
cpu.reg._8.set(tgt, d8);
return .{};
}
If we want to keep using microp, we cannot map 1-to-1 like I did with wazm so we'll need the manual decode step. It might be worth investigating flattening the decode, but I'm not sure I want to go from ~80 microps to ~240 ops.
I like this direction. I also feel like there should be better standardization of type abbreviations (because I really don't remember the current ones):
r-
— registeri-
— immediateR-
— register-as-pointerI-
— immediate-as-pointer-b
— byte (8 bit)-w
— wide/word (16 bit)With the separate decode step, we can finally get rid of the instruction offset hacks in all of the call ops and irqStep!
fn step() u16 {
const op = decode();
cpu.reg.PC += op.length;
const duration = run(op);
assert(duration == op.next_duration or duration == op.jump_duration);
return duration;
}
const Op.Result = extern struct {
duration,
fn Fixed(length: u8, duration: u8) type {
return extern struct {
const length = length;
const next_duration = duration;
const jump_duration = duration;
duration: u16 = duration,
};
}
fn Cond(length: u8, next_duration: u8, jump_duration: u8) type {
return extern struct {
const length = length;
const next_duration = next_duration;
const jump_duration = jump_duration;
duration: u16,
};
}
};
Extracted from https://github.com/fengb/fundude/issues/12, https://github.com/fengb/fundude/issues/16
We need a way to save decoded instructions.
Note: this will most likely hurt performance since we're converting the "step" jump table into a "decode" jump table + function pointer call. The followup cache should speed everything back up.