Memotech-Bill / PicoBB

BBC BASIC for Raspberry Pi Pico
zlib License
32 stars 4 forks source link

Assembler problems #2

Closed rtrussell closed 2 years ago

rtrussell commented 2 years ago

This instruction is reporting 'Too many parameters':

ADCS r0, r0, r3

The reference I am using for the Cortex-M0+ instruction set is here. I am assuming that the assembler is designed to accept 'unified syntax', not the old Thumb16 syntax which is deprecated.

Memotech-Bill commented 2 years ago

I had been working from the ARM v6M Architecture Reference Manual which has multiple, not always consistent, definitions of the syntax.

The code has now (hopefully) been extended to support all the alternate syntax options.

rtrussell commented 2 years ago

The code has now (hopefully) been extended to support all the alternate syntax options.

Thanks. Unfortunately I've noticed another anomaly; this instruction is being accepted and assembled, but it's not valid for the armv6-m (there's a 32-bit encoding but not a 16-bit encoding):

ADD r1, r2, r3

What seems to be happening is that it is being treated as synonymous with ADDS r1, r2, r3 but it's not (ADDS affects the flags, ADD doesn't).

Memotech-Bill commented 2 years ago

I have been following what the GNU assembler does and if unspecified emit the sign affecting instruction if that is the only one available.

    adc r0, r1
   0:   4148        adcs    r0, r1
    add r2, r3, # 4
   2:   1d1a        adds    r2, r3, # 4
    add r5, # 6
   4:   3506        adds    r5, # 6
    add r1, r2, r3
   6:   18d1        adds    r1, r2, r3
    add r8, r9
   8:   44c8        add r8, r9
    add r4, sp, # 20
   a:   ac05        add r4, sp, # 20
    add sp, sp, # 32
   c:   b008        add sp, # 32
    add r7, sp, r7
   e:   446f        add r7, sp
    add sp, r10
  10:   44d5        add sp, sl
    adr r5, label1
  12:   a500        add r5, pc, # 0 ; (adr r5, 14 <label1>)

00000014 <label1>:
  14:   00000028    .word   0x00000028
label1: .int 40
    and r2, r4
  18:   4022        ands    r2, r4
    asr r3, r5, # 15
  1a:   13eb        asrs    r3, r5, # 15
    asr r4, r6
  1c:   4134        asrs    r4, r6
    b label2
  1e:   e000        b.n 22 <label2>
    bgt label2
  20:   dcff        bgt.n   22 <label2>

00000022 <label2>:
label2:
    bic r7, r3
  22:   439f        bics    r7, r3
    bkpt # 0
  24:   be00        bkpt    0x0000
    bl label3
  26:   f000 f800   bl  2a <label3>

0000002a <label3>:
label3:
    blx r9
  2a:   47c8        blx r9
    bx r10
  2c:   4750        bx  sl
    cmn r6, r2
  2e:   42d6        cmn r6, r2
    cmp r5, # 70
  30:   2d46        cmp r5, # 70    ; 0x46
    cmp r4, r1
  32:   428c        cmp r4, r1
    dmb
  34:   f3bf 8f5f   dmb sy
    dsb
  38:   f3bf 8f4f   dsb sy
    eor r3, r0
  3c:   4043        eors    r3, r0
    isb
  3e:   f3bf 8f6f   isb sy
    ldm r0!, {r1, r3, r5}
  42:   c82a        ldmia   r0!, {r1, r3, r5}
    ldm r0, {r0-r4}
  44:   c81f        ldmia   r0, {r0, r1, r2, r3, r4}
    ldr r1, [r2, # 28]
  46:   69d1        ldr r1, [r2, # 28]
    ldr r3, [sp, # 100]
  48:   9b19        ldr r3, [sp, # 100] ; 0x64
    ldr r4, label4
  4a:   4c00        ldr r4, [pc, # 0]   ; (4c <label4>)

0000004c <label4>:
  4c:   0000002a    .word   0x0000002a
label4: .int 42
    ldr r5, [r6, r7]
  50:   59f5        ldr r5, [r6, r7]
    ldrb r7, [r6, # 5]
  52:   7977        ldrb    r7, [r6, # 5]
    ldrb r4, [r3, r2]
  54:   5c9c        ldrb    r4, [r3, r2]
    ldrh r1, [r0, # 10]
  56:   8941        ldrh    r1, [r0, # 10]
    ldrh r0, [r2, r4]
  58:   5b10        ldrh    r0, [r2, r4]
    ldrsb r1, [r3, r5]
  5a:   5759        ldrsb   r1, [r3, r5]
    ldrsh r2, [r4, r6]
  5c:   5fa2        ldrsh   r2, [r4, r6]
    lsl r3, r4, # 5
  5e:   0163        lsls    r3, r4, # 5
    lsl r6, r7
  60:   40be        lsls    r6, r7
    lsr r0, r2, # 4
  62:   0910        lsrs    r0, r2, # 4
    lsr r1, r3
  64:   40d9        lsrs    r1, r3
    mov r2, # 70
  66:   2246        movs    r2, # 70    ; 0x46
    mov r3, r4
  68:   1c23        adds    r3, r4, # 0
    movs r3, r4
  6a:   1c23        adds    r3, r4, # 0
    mov r3, r9
  6c:   464b        mov r3, r9
    mul r5, r6, r5
  6e:   4375        muls    r5, r6
    mvn r7, r0
  70:   43c7        mvns    r7, r0
    nop
  72:   46c0        nop         ; (mov r8, r8)
    orr r1, r2
  74:   4311        orrs    r1, r2
    pop {r0-r3, pc}
  76:   bd0f        pop {r0, r1, r2, r3, pc}
    pop {r2, r4, r6}
  78:   bc54        pop {r2, r4, r6}
    push {r4-r7, lr}
  7a:   b5f0        push    {r4, r5, r6, r7, lr}
    push {r1, r3, r5}
  7c:   b42a        push    {r1, r3, r5}
    rev r6, r7
  7e:   ba3e        rev r6, r7
    rev16 r1, r0
  80:   ba41        rev16   r1, r0
    revsh r3, r2
  82:   bad3        revsh   r3, r2
    ror r5, r4
  84:   41e5        rors    r5, r4
    sbc r4, r0
  86:   4184        sbcs    r4, r0
    sev
  88:   bf40        sev
    stm r0!, {r2-r5}
  8a:   c03c        stmia   r0!, {r2, r3, r4, r5}
    str r3, [r5, # 28]
  8c:   61eb        str r3, [r5, # 28]
    str r4, [sp, # 88]
  8e:   9416        str r4, [sp, # 88]  ; 0x58
    str r5, [r6, r7]
  90:   51f5        str r5, [r6, r7]
    strb r0, [r3, # 6]
  92:   7198        strb    r0, [r3, # 6]
    strb r1, [r4, r7]
  94:   55e1        strb    r1, [r4, r7]
    strh r2, [r5, # 6]
  96:   80ea        strh    r2, [r5, # 6]
    strh r3, [r6, r1]
  98:   5273        strh    r3, [r6, r1]
    sub r4, r7, # 4
  9a:   1f3c        subs    r4, r7, # 4
    sub r5, # 67
  9c:   3d43        subs    r5, # 67    ; 0x43
    sub r6, r4, r2
  9e:   1aa6        subs    r6, r4, r2
    sub sp, sp, # 84
  a0:   b095        sub sp, # 84    ; 0x54
    svc # 246
  a2:   dff6        svc 246 ; 0xf6
    sxtb r0, r1
  a4:   b248        sxtb    r0, r1
    sxth r2, r3
  a6:   b21a        sxth    r2, r3
    tst r4, r5
  a8:   422c        tst r4, r5
    uxtb r6, r7
  aa:   b2fe        uxtb    r6, r7
    uxth r7, r3
  ac:   b29f        uxth    r7, r3
    wfe
  ae:   bf20        wfe
    wfi
  b0:   bf30        wfi
    mrs r1, xpsr
  b2:   f3ef 8103   mrs r1, PSR
    msr msp, r0
  b6:   f380 8808   msr MSP, r0

I don't think that for the ARM v6M instructions, there are any instructions that differ only as to whether or not they affect the flags.

My assembler does check for the opposite, if a trailing 's' has been specified but the instruction is not sign affecting it issues an error.

rtrussell commented 2 years ago

I have been following what the GNU assembler does and if unspecified emit the sign affecting instruction if that is the only one available.

It doesn't do that for me. If I try to assemble this instruction:

ADD r1, r2, r3

GNU as (with -march=armv6-m switch) reports this:

armv6mtest.s: Assembler messages:
armv6mtest.s:4: Error: cannot honor width suffix -- `add r1,r2,r3'

I don't think that for the ARM v6M instructions, there are any instructions that differ only as to whether or not they affect the flags.

How about these (output from GNU assembler):

   3 0000 1544      ADD r5, r5, r2
   4 0002 AD18      ADDS r5, r5, r2
Memotech-Bill commented 2 years ago

What compiler directives (if any) are you using in your source file?

rtrussell commented 2 years ago

What compiler directives (if any) are you using in your source file?

Only:

.syntax unified
.text
Memotech-Bill commented 2 years ago

Try without .syntax unified

rtrussell commented 2 years ago

Try without .syntax unified

armv6mtest.s: Assembler messages:
armv6mtest.s:3: Error: instruction not supported in Thumb16 mode -- `adds r0,r0,r1'
armv6mtest.s:5: Error: instruction not supported in Thumb16 mode -- `adds r0,r1'

I would refer you to ARM's own documentation for that CPU, which lists amongst the valid instructions:

All registers Lo | ADDS Rd, Rn, Rm | 1
Any to Any       | ADD Rd, Rd, Rm  | 1

So, unless I'm missing something, both those instructions are legitimate. It wouldn't be too difficult to test them both on the Pico itself (by poking the hex opcodes into memory); I can try that if you think it would be helpful.

rtrussell commented 2 years ago

I can try that if you think it would be helpful.

OK, I tried it using this program:

   10 DIM P% 11
   20 P% = (P% + 3) AND -4
   30
   40 PRINT "Testing 'ADDS R0,R0,R1' instruction, &1840:"
   50 P%!0 = &47701840
   60
   70 A% = 1234
   80 B% = 5678
   90 C% = USR(P%+1)
  100 PRINT "According to the Pico's CPU 1234 + 5678 = "; C%
  110
  120 PRINT "Testing 'ADD R0,R0,R1' instruction, &4408:"
  130 P%!4 = &47704408
  140
  150 A% = 1234
  160 B% = 5678
  170 C% = USR(P%+5)
  180 PRINT "According to the Pico's CPU 1234 + 5678 = "; C%

With this result:

Testing 'ADDS R0,R0,R1' instruction, &1840:
According to the Pico's CPU 1234 + 5678 = 6912
Testing 'ADD R0,R0,R1' instruction, &4408:
According to the Pico's CPU 1234 + 5678 = 6912

So I would say that's pretty conclusive evidence that both instructions work on the Pico.

rtrussell commented 2 years ago

For comparison, this is what your assembler is outputting for those same two instructions:

0A6A1C04 1840                 adds r0,r0,r1
0A6A1C06 1840                 add r0,r0,r1

Interestingly, if I try it with the 'alternative' 2-register syntax I get:

0A6A1C08 4408                 adds r0,r1
0A6A1C0A 4408                 add r0,r1

So it looks as though the encoding your assembler generates depends on which syntax is used, not on whether you want the flags to be affected or not!

Memotech-Bill commented 2 years ago

I have now committed a new file bbasm_arm_v6mu.c (note the 'u' at the end of the filename) which uses unified syntax.

I will continue to consider whether I can add an option to support both forms.

rtrussell commented 2 years ago

I have now committed a new file bbasm_arm_v6mu.c

Thanks, your continued efforts are appreciated. Something doesn't seem quite right though:

>[adds r0,r0,r1
0B6B20A4 1840                 adds r0,r0,r1
>[add r0,r0,r1
Invalid register

I will continue to consider whether I can add an option to support both forms.

My view is that there is no harm in forcing BBC BASIC users to use unified syntax, if it makes your life easier; it's what ARM prefers to be used for the Cortex-M0+. But equally if you prefer to support both forms, that's OK too. If there was an easy way to incorporate an #ifdef so that it could be a build option, better still!

Memotech-Bill commented 2 years ago

The ADD bug should be fixed with my latest commit.

rtrussell commented 2 years ago

The ADD bug should be fixed with my latest commit.

Great!

An issue I have spotted is that none of the instructions BLE, BLO, BLS or BLT is recognised. That they all start with BL seems suspicious:

[BLE P%
No such variable

This is a question rather than a criticism: the GNU assembler accepts instructions like:

adds r7,r7,#-18

which it silently converts to:

subs r7,r7,#18

Do you think the BBC BASIC assembler should, or could, do the same?

Memotech-Bill commented 2 years ago

Yes the conditional branches were being confused with BL. Fixed.

This is a question rather than a criticism: the GNU assembler accepts instructions like: adds r7,r7,#-18 which it silently converts to: subs r7,r7,#18 Do you think the BBC BASIC assembler should, or could, do the same?

Seems reasonable. Done.

rtrussell commented 2 years ago

Seems reasonable. Done.

I've confirmed that Acorn's assembler (at least, the version I have) doesn't do that, and nor does the assembler that is shipped with the (32-bit) Raspberry Pi and Android editions of BBC BASIC. But it certainly doesn't do any harm.

I think your 32-bit opcodes have the wrong byte order. For example DMB is encoding as (showing the bytes individually in order):

5F 8F BF F3

whereas the GNU assembler is generating:

BF F3 5F 8F

and ARM's own documentation shows (as two 16-bit shorts):

1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1 (F3BF)
1 0 0 0 1 1 1 1 0 1 0 1 option  (8F5x)

Edit: I also think your listing would be clearer if you showed it as two 16-bit values with a space in between.

Memotech-Bill commented 2 years ago

The Architecture Reference Manual shows the instructions almost as 32-bit values:

image

Which on a little-endian system would put the F3 as the last byte.

However, presumably the GNU assembler is correct.

rtrussell commented 2 years ago

The Architecture Reference Manual shows the instructions almost as 32-bit values:

If it was a 32-bit value the bits would be labelled from 31 on the left to 0 on the right. As it is they're labelled as two 16-bit values: bits 15 to 0 and then 15 to 0 again (with a dividing line). Unlike ARM-32, which has 32-bit opcodes, Thumb2 has 16-bit opcodes (some instructions have one, some instructions have two).

That's why I think the listing would also be clearer if everything was shown as 16-bit (or 8-bit if you prefer), so either:

0B6B21FC F3BF 8F5F DMB

or

0B6B21FC BF F3 5F 8F DMB

Memotech-Bill commented 2 years ago

Done.

rtrussell commented 2 years ago

Done.

Thanks again. We're definitely getting closer: I'm now seeing a difference between your assembler and GNU for the CPSID i instruction. Yours is giving:

[CPSID i
0B6B2208 62 B6                CPSID i

But GNU has:

  38 0048 72B6      CPSID i
  39 004a 71B6      CPSID f
  40 004c 73B6      CPSID if
  41 004e 62B6      CPSIE i
  42 0050 61B6      CPSIE f
  43 0052 63B6      CPSIE if

In other words it reckons your opcode is for CPSIE i (enable rather than disable).

rtrussell commented 2 years ago

Whilst you're at it, can you also check the encoding of BL because it looks to me that reversing the words has changed that from being correct to incorrect. Perhaps your '32-bit' instructions always were inconsistent in their byte order. :-(

Memotech-Bill commented 2 years ago

Yet another version committed.

rtrussell commented 2 years ago

Yet another version committed.

Not there yet. LDRSH (register) seems to be misbehaving:

LDRSH r4, [r5, r1]

Your assembler is giving:

6C 57

But GNU thinks it's:

6C 5E

ARM's documentation says (section 4.6.65):

0 1 0 1 1 1 1 Rm Rn Rt

So the only possibilities are 5Exx or 5Fxx.

Memotech-Bill commented 2 years ago

Typo. Fixed.

rtrussell commented 2 years ago

Typo. Fixed.

Getting really close now, as far as my tests can ascertain. The only things that seem wrong are some directives, for example:

equb seemingly incrementing the program counter by 2 rather than 1:

[equb &12 : equb &34 : equb &56
0B6B2310 12                   equb &12
0B6B2312 34                   equb &34
0B6B2314 56                   equb &56

align printing some spurious hex to the listing when no padding is needed:

[align 4
0B6B22F0 11 80 00 00          align 4
Memotech-Bill commented 2 years ago

Hopefully fixed.

rtrussell commented 2 years ago

Hopefully fixed.

I'm pleased to say I haven't found any other encoding errors. If you're worried about such things, this error message is (at first glance) unhelpful:

[add r7,r7,#1
Invalid register

It's not the register that's invalid here (there's no alternative register that would make it work, except SP) but the use of add rather than adds.

Memotech-Bill commented 2 years ago

It's not the register that's invalid here (there's no alternative register that would make it work, except SP) but the use of add rather than adds.

The trouble is that

add r7, r7, #1
add r9, r9, #1

both hit the same error trap. Only the first one would be valid for adds

I am aware of one potential issue:

    align 4
    equb 1
.label adds r0, r0, #2

The label will be evaluated at the odd address before the PC is aligned up to the next even address to insert the instruction.

Memotech-Bill commented 2 years ago

I have pushed a small amendment so that the label misalignment is at least visible:

017B89BE 00 BF                align 4
017B89C0 01                   dcb 1
017B89C1 00 01 30    .label   adds r0, r0, #1
Memotech-Bill commented 2 years ago

I think I can see how to make a label address match the location of the following opcode. However, consider:

    align 4
    equb 1
.label1
.label2 adds r0, r0, #1 

With the modification I am considering, label2 would have the even address of the adds instruction, but label1 will have an odd address. Comments?

rtrussell commented 2 years ago

The label will be evaluated at the odd address before the PC is aligned up to the next even address to insert the instruction.

Checking other versions of BBC BASIC:

Acorn's ARM BASIC V (the 'reference'): acorn

BBC BASIC for SDL 2.0 (32-bit ARM):

DIM P% 100
[align : equb 1 : .label adds r0,r0,#2
0A691C04                      align
0A691C04 01                   equb 1
0A691C08 E2900002 .label      adds r0,r0,#2
>PRINT ~label
   A691C05

So this is expected behaviour and should not be changed.

Edit: Incidentally, the GNU assembler treats align (without a parameter) as align 4, which seems correct to me so all ARM architectures are consistent. Are you doing the same?

rtrussell commented 2 years ago

So this is expected behaviour and should not be changed.

However there is a bug. On initial entry to the assembler (i.e. after [) the program counter should be aligned. It's important that code like this always sets the label to the address of the instruction and not to an unaligned address:

[.label nop

Currently your assembler does not ensure this.

rtrussell commented 2 years ago

However there is a bug. On initial entry to the assembler (i.e. after [) the program counter should be aligned.

This turns out to be a regression. In bbasmb_arm_32.c and bbasmb_arm_64.c there's a Boolean init which specifically ensures that if a label is declared immediately after the assembler is started, the program counter is aligned. It looks like you took that out; why? It's a better solution than unconditionally aligning because it allows this:

[equb &12
0B6B1C03 12                   equb &12
[equb &34
0B6B1C04 34                   equb &34
[equb &56
0B6B1C05 56                   equb &56

I would encourage you to reinstate the init functionality.

Memotech-Bill commented 2 years ago

Resinstated init functionality. Align without argument was doing align 2. Now changed to align 4.

I have also committed another version of the assembler in file bbasmb_arm_v6m2.c. This version also uses unified syntax by default, however adding the pseudo-op:

syntax d

enables the following extensions to the allowed syntax:

The extensions may be disabled again by specifying:

syntax u

This code has been derived from the checked bbasmb_arm_v6mu.c and hopefully there are no reversions.

The original buggy bbasmb_arm_v6m.c has been deleted.

rtrussell commented 2 years ago

Resinstated init functionality. Align without argument was doing align 2. Now changed to align 4.

Thanks again; I'm not aware of any errors in the assembler now. There is one remaining issue that I don't know how to resolve:

Judging by what happens currently with the ARM32 assembler on the Pico, it is likely that your assembler will result in alignment faults when writing to non-multiple-of-4 addresses. But I don't understand why; the poke() routine uses memcpy() to write to memory, and I thought we had established that it is alignment-safe except when inlined. And since the byte count is in a variable rather than a constant, I would not expect it to be inlined.

Can you think of any reason why poke() (or anything else in your assembler) might cause an alignment fault, and how to fix it?

Memotech-Bill commented 2 years ago

Having switched back to the Pico I am currently having all sorts of alignment problems in areas that I thought were working. I am investigating.

rtrussell commented 2 years ago

I am investigating.

If poke() refuses to play ball, a couple of things that could be tried (neither very nice):

Memotech-Bill commented 2 years ago

The problems were not in the assembler at all.

All the variables defined in bbdata_arm32.s had become unaligned. Changing the order of the files in CMakeLists.txt to put that file first seems to have resolved the issue.

I did contemplate putting

.align 4

at the top of that file.

rtrussell commented 2 years ago

All the variables defined in bbdata_arm32.s had become unaligned.

Hmm, I thought that each object module was independently aligned by the linker (probably on at least a 16-byte boundary); I'm almost certain that's the behaviour in Windows, because .align directives only align things with respect to the start of that module, so the module must be aligned as well.

Anyway, that doesn't help with the alignment faults arising from the assembler. I presume that you're seeing them on the Pico in the same way as I am with the ARM32 assembler?

Memotech-Bill commented 2 years ago

I have spent most of the afternoon getting the Pico to work at all.

Having finally succeeded, no alignment faults seen so far in the assembler.

rtrussell commented 2 years ago

no alignment faults seen so far in the assembler.

Odd. With the ARM32 assembler I'm getting an instant alignment fault from this:

BBC BASIC for Pico Console v0.37
(C) Copyright R. T. Russell, 2021
>DIM P% 100
>[equw &1234 : equw &5678
Data alignment fault sr = 0x01000000
R0  = 0x0000003a  R8  = 0x20007703
R1  = 0x20041ce0  R9  = 0x20000858
R2  = 0x20007703  R10 = 0x2000640d
R3  = 0x20007703  R11 = 0x20027700
R4  = 0x00000002  R12 = 0x20007703
R5  = 0x0000000e  SP  = 0x20041cb8
R6  = 0x20007703  LR  = 0x1000b05d
R7  = 0x20006401  PC  = 0x1000b19a
 -> unaligned access to 0x20007703

Given that I don't believe the code for equw is different in your version I wonder what accounts for this. Of course I'm using Eric's build, but I assume you and he are using the same version of the SDK.

rtrussell commented 2 years ago

If you want to test the assembler on the Pico you can try running this program:

      REM Hello world! assembler example for the Raspberry Pi Pico
      REM Richard Russell, http://www.rtrussell.co.uk, 16-Oct-2021

      DIM code% 100, L% -1
      FOR pass% = 8 TO 11 STEP 3
        P% = code%
        [opt pass%
        .hello
        push {r6-r7,lr}
        ldr r6,oswrch
        adr r7,message    ; pointer to text
        .loop
        ldrb r0,[r7]      ; get next character
        adds r7,r7,#1     ; bump pointer
        cmp r0,#0         ; is it a NUL ?
        beq finish        ; if so exit
        blx r6            ; output character
        b loop            ; continue
        ;
        .finish
        pop {r6-r7,pc}    ; return to caller
        ;
        align
        .oswrch
        equd "oswrch"
        .message
        equs "Hello world!"
        equw &0A0D
        equb 0
        ]
      NEXT pass%

      CALL hello+1 : REM Add one because of Thumb code
Memotech-Bill commented 2 years ago

Result posted on the Raspberry Pi Forum

Memotech-Bill commented 2 years ago

The assembler appears to be working now.