X16Community / x16-emulator

Emulator for the Commander X16 8-bit computer
BSD 2-Clause "Simplified" License
205 stars 43 forks source link

When FX One Byte Fill and Multiplication flags are set, a write doesn't trigger the multiplication #275

Closed Yazwh0 closed 5 months ago

Yazwh0 commented 5 months ago

The byte cycling check in video.c looks suspect to me.

            if (fx_cache_write) {
                address &= 0x1fffc;
                if (fx_cache_byte_cycling) {
                    fx_vram_cache_write(address+0, fx_cache[fx_cache_byte_index], value & 0x03);
                    fx_vram_cache_write(address+1, fx_cache[fx_cache_byte_index], (value >> 2) & 0x03);
                    fx_vram_cache_write(address+2, fx_cache[fx_cache_byte_index], (value >> 4) & 0x03);
                    fx_vram_cache_write(address+3, fx_cache[fx_cache_byte_index], value >> 6);
                } else {
                    if (fx_multiplier) {
                        int32_t m_result = (int16_t)((fx_cache[1] << 8) | fx_cache[0]) * (int16_t)((fx_cache[3] << 8) | fx_cache[2]);
                        if (fx_subtract)
                            m_result = fx_mult_accumulator - m_result;
                        else
                            m_result = fx_mult_accumulator + m_result;
                        fx_vram_cache_write(address+0, (m_result) & 0xff, value & 0x03);
                        fx_vram_cache_write(address+1, (m_result >> 8) & 0xff, (value >> 2) & 0x03);
                        fx_vram_cache_write(address+2, (m_result >> 16) & 0xff, (value >> 4) & 0x03);
                        fx_vram_cache_write(address+3, (m_result >> 24) & 0xff, value >> 6);
                    } else {
                        fx_vram_cache_write(address+0, fx_cache[0], value & 0x03);
                        fx_vram_cache_write(address+1, fx_cache[1], (value >> 2) & 0x03);
                        fx_vram_cache_write(address+2, fx_cache[2], (value >> 4) & 0x03);
                        fx_vram_cache_write(address+3, fx_cache[3], value >> 6);
                    }
                }

Test app:

import BM="bm.bmasm";

.machine CommanderX16R42
    BM.X16Header();

    php
    sei

    lda CTRL
    pha

    stz CTRL

    lda ADDRx_L
    pha
    lda ADDRx_M
    pha
    lda ADDRx_H
    pha

    stz ADDRx_L
    stz ADDRx_M
    lda #$10
    sta ADDRx_H

    ; store some known data and clear a bit fo vram
    lda #01
    sta DATA0
    inc
    sta DATA0
    inc
    sta DATA0
    inc
    sta DATA0

    stz DATA0
    stz DATA0
    stz DATA0
    stz DATA0
    stz DATA0
    stz DATA0

    lda #2 << 1
    sta CTRL

    lda #$20
    sta FX_CTRL     ; Cache fill

    ; reset port
    stz CTRL
    stz ADDRx_L
    stz ADDRx_M
    lda #$10
    sta ADDRx_H

    lda DATA0
    lda DATA0
    lda DATA0
    lda DATA0       ; cache now 04 03 02 01, or 0x102, 0x304

    lda #2 << 1
    sta CTRL

    ;lda #$00
    ;sta FX_CTRL     ; Turn off Cache fill

    ; ; reset port, but cache fill is still set
    ; stz CTRL
    ; stz ADDRx_L
    ; stz ADDRx_M
    ; lda #$01
    ; sta ADDRx_H

    lda #06 << 1
    sta CTRL

    lda FX_ACCUM_RESET  ; reset accumulator just in case

    lda #02 << 1
    sta CTRL

    lda #%01010000
    sta FX_CTRL         ; set cache write enable + 1byte fill STOPS MULTIPLICATION ON EMULATOR

    lda #%00010000      ; multiplier
    sta FX_MULT     ; $9F2C

    stz CTRL
    lda #04
    sta ADDRx_L
    stz ADDRx_M
    stz ADDRx_H

    stz DATA0           ; do multiplication and write cache, this happens on a write

    lda #$10
    sta ADDRx_H

    lda DATA0           ; should be 0x102 * 0x304 = 0x00030a08
    sta $400
    lda DATA0
    sta $401
    lda DATA0
    sta $402
    lda DATA0
    sta $403

    ; tidy up so we can return to basic.
    ;stp
    lda #06 << 1
    sta CTRL
    stz FX_CTRL

    lda #2 << 1
    sta CTRL
    stz FX_CTRL

    stz CTRL

    pla
    lda ADDRx_H
    pla
    lda ADDRx_M
    pla
    lda ADDRx_L

    pla
    sta CTRL

    plp

    rts

Emulator image

Hardware image

mooinglemur commented 5 months ago

I think the source you pasted is not current, but I believe the problem still exists.

mooinglemur commented 5 months ago

According to my reading of the verilog, the emulator behavior is what the VERA should be doing.

It looks like the code is saying if one byte cache cycling is on, the source should be the current index of the cache, otherwise pull it from either the cache or the multiplier/accum output depending on the state of the fx_multiplier toggle.

If that's not what it means, please help me understand.

        if (if0_one_byte_cache_cycling) begin
            if0_wrdata_to_use = if0_cache8;
        end else begin
            if0_wrdata_to_use = if0_wrdata;
        end

        if (if0_cache_write_enabled && !if0_one_byte_cache_cycling) begin
            // In cache write mode, we use the 32-bit data from the cache
            ram_wrdata = if0_mult_accum_cache32;
        end else begin
            // In non-cache write mode, we use the wrdata and duplicate to all four 8-bit channels
            ram_wrdata = {4{if0_wrdata_to_use}};
        end
Yazwh0 commented 5 months ago

I can only think I switched the sdcard over too quickly in testing, so ended up with an old version when I expected a new one?

I dunno. Appologies.