jcmvbkbc / gcc-xtensa

gcc for xtensa
GNU General Public License v2.0
131 stars 58 forks source link

error: can't find a register in class 'RL_REGS' while reloading 'asm' #7

Open mroavi opened 7 years ago

mroavi commented 7 years ago

I found a very interesting I2C master implementation written in assembly language for the ESP8266 (https://github.com/pasko-zh/brzo_i2c). However, as the author mentions, the library is written for the Arduino toolchain.

I made some slight changes in order to compile it with the newest SDK from espressif: ESP8266_NONOS_SDK_V2.0.0_16_07_19.

However, I haven't been able to get past this compiler error:

brzo_i2c.c: In function 'brzo_i2c_read':
modules/brzo_i2c.c:413:2: error: can't find a register in class 'RL_REGS' while reloading 'asm'
  asm volatile (
  ^
modules/brzo_i2c.c:413:2: error: 'asm' operand has impossible constraints
make: *** [build/modules/brzo_i2c.o] Error 1

14:46:16 Build Finished (took 162ms)

The CFLAG variable in my Makefile looks like this:

# compiler flags using during compilation of source files
CFLAGS      =   -g          \
                -Wpointer-arith     \
                -Wundef         \
                -Wl,-EL         \
                -fno-inline-functions   \
                -nostdlib       \
                -mlongcalls \
                -mtext-section-literals \
                -ffunction-sections \
                -fdata-sections \
                -fno-builtin-printf\
                -DICACHE_FLASH \
                -DBUID_TIME=\"$(DATETIME)\" \
                -std=gnu89

This is the inline assembler code that it points to:

asm volatile (
        // Disable all interrupts, i.e. interrupts up to the highest interrupt level of 15
        //   the current level is saved in %[r_temp1] but we will not use that value again,
        //   instead we will just enable all interrupt levels at the end of this routine
        "RSIL   %[r_temp1], 15;"
        "MOVI   %[r_set], 0x60000304;"

        // Check if bus is free and send START
        "OR     %[r_temp1], %[r_sda_bitmask], %[r_scl_bitmask];"
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "MEMW;"
        "MOVI.N %[r_error], 1;"
        // If either SDA or SCL is low, then bus is not free and thus jump to l_exit
        "BNALL  %[r_in_value], %[r_temp1], l_exit;"
        // Bus is free, so we can send START
        "MOVI.N %[r_error], 0;"
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // Set SCL = 1
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "MEMW;"
        // Set SDA = 0
        // Delay for tHD;STA  >= 4.0 usec for standard mode, 0.6 usec for fast or 0.26 usec fast mode plus
        //  => a delay of one half cycle is enough to meet those timings
        "S16I   %[r_sda_bitmask], %[r_set], 4;"  // clear: 0x60000308
        "l_w01:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w01;"
        // Post Condition: SDA = 0, SCL = 1

        // The outer loop, sending 1...n data bytes
        "l_send_byte:"
        // select the MSB of byte_to_send
        "MOVI   %[r_bit_index], 128;"
        // The inner loop, sending 1...8 bits
        "l_send_bit:"
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // check if the bit of byte_to_send at bit_index is 0 or 1
        "BALL   %[r_byte_to_send], %[r_bit_index], l_sda1_scl0;"
        // SDA = 0, SCL = 0
        "S16I   %[r_scl_bitmask], %[r_set], 4;" // clear: 0x60000308
        "MEMW;"
        "S16I   %[r_sda_bitmask], %[r_set], 4;" // clear: 0x60000308
        "l_w02:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w02;"
        "j l_sdax_scl1;"

        "l_sda1_scl0:"
        // SDA = 1, SCL = 0
        "S16I   %[r_scl_bitmask], %[r_set], 4;" // clear: 0x60000308
        "MEMW;"
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "l_w03:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w03;"

        "l_sdax_scl1:"
        // SDA = leave unchanged and set SCL = 1
        // Check for clock stretching
        // Delay is little bit shorter, i.e. half_cycle - delta
        "ADDI   %[r_temp1], %[r_iteration_scl_halfcycle], -5;"
        // Let SCL raise
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "MEMW;"
        "l_w04:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        // Explicitly BGEZ instead of BNEZ
        "BGEZ   %[r_temp1], l_w04;"

        // Sample SCL value
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "MEMW;"
        // r_temp1 holds the number of iterations for clock stretch timeout
        "MOV.N  %[r_temp1], %[r_iteration_scl_clock_stretch];"
        // Branch if SCL = 1, i.e. no stretching
        "BALL   %[r_in_value], %[r_scl_bitmask], l_no_stretch;"
        // SCL = 0, i.e. stretching by the slave, i.e. it pulls SCL low
        "l_stretch:"
        // Sample SCL value
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "MEMW;"
        // Branch if SCL = 1, i.e. no more stretching
        "BALL   %[r_in_value], %[r_scl_bitmask], l_scl_high_by_slave;"
        // SCL is still low
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        // Did we reach the clock stretch timeout?
        // Branch if we have not yet reached the timeout
        "BNEZ   %[r_temp1], l_stretch;"
        // We have reached the clock stretch timeout, i.e. SCL is still pulled low by the slave
        // Error: Bus is not free, since SCL is still low AND clock stretch timeout reached
        "MOVI.N %[r_error], 8;"
        // We explicitly do not send a STOP instead we exit, i.e. jump to l_exit and not to l_send_stop
        "j l_exit;"

        "l_scl_high_by_slave:"
        // SCL was set high by the slave
        // We have to make sure that SCL = 1 for a complete half cycle
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        "l_w041:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w041;"

        "l_no_stretch:"
        // Postcondition: SCL = 1 for a half cycle
        // Are there bits left that we need to send?
        "SRLI   %[r_bit_index], %[r_bit_index], 1;"
        // When the LSB of the byte_to_send was sent, i.e. bit index was 1 before SRLI, it will now be zero
        // As long as the LSB was not sent keep on sending bits, i.e. jump
        "BNEZ   %[r_bit_index], l_send_bit;"
        // we have sent 8 Bits

        // check for ACK by slave
        // Precondition
        // SDA = LSB (i.e. SDA = 0, since we have an i2c write), SCL = 1
        // SCL = 0
        // Spike reducing waits here
        "S16I   %[r_scl_bitmask], %[r_set], 4;"  // clear : 0x60000308
        "MOV.N  %[r_temp1], %[r_iteration_minimize_spike];"
        "l_w05:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w05;"
        // Reduce number of iterations by the ones we've already used
        "SUB    %[r_temp1], %[r_iteration_scl_halfcycle], %[r_iteration_minimize_spike];"
        // Now we let SDA raise.
        // In case of an ACK the i2c slave is pulling SDA down
        // In case of an NACK, SDA raises
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "MEMW;"
        "l_w06:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BGEZ   %[r_temp1], l_w06;"

        // Delay is little bit shorter, i.e. half_cycle - delta
        // Because we will have a L16UI after in this half cycle
        "ADDI   %[r_temp1], %[r_iteration_scl_halfcycle], -5;"
        // Set SCL = 1, i.e. start of the second half cycle of the 9th SCL cycle
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "MEMW;"
        // Delay for the second half cycle of the 9th SCL cycle
        "l_w07:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BGEZ   %[r_temp1], l_w07;"
        // Sample SDA at the end of the 9th clock cycle
        // In the case of an NACK we want to leave enough time that SDA can raise
        // If sda_value AND sda_bitmask == 0 => ACK else we have an NACK
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "BNALL  %[r_in_value], %[r_sda_bitmask], l_slave_ack;"
        "MOVI.N %[r_error], 2;"
        // NACK by slave
        // Postcondition:
        //   SDA = 1 (NACK) and SCL = 1
        //   9th Clock Cycle is finished
        "j l_send_stop;"

        "l_slave_ack:"
        // ACK
        // Precondition: SDA = 0 (still pulled low by the slave) and SCL = 1
        // The slave will pull SDA low as long as SCL = 1
        // We have to set SDA = 0 by the master
        // clear : 0x60000308
        "S16I   %[r_sda_bitmask], %[r_set], 4;"
        // Postcondition:
        //   SDA = 0 and SCL = 1
        //   9th Clock Cycle is finished

        "BEQZ   %[r_no_of_bytes], l_send_stop;"
        // Branch if there are no more Data Bytes to send
        // Load the corresponding element of array data[.] into byte_to_send
        "L8UI   %[r_byte_to_send], %[r_adr_array_element], 0;"
        // Move the pointer to the next array element (since we have an array of bytes, the increment is 1)
        "ADDI.N %[r_adr_array_element], %[r_adr_array_element], 1;"
        // Decrement the number of bytes to send
        "ADDI.N %[r_no_of_bytes], %[r_no_of_bytes], -1;"
        "j l_send_byte;"

        "l_send_stop:"
        // Send Stop
        // We have to make sure that SDA = 0 and SCL = 1, before we send the STOP sequence,
        //   i.e. "A LOW to HIGH transition on the SDA line while SCL is HIGH"
        // In order to achieve this econdition, we have to distinguish between
        //   1) NACK: SDA = 1, SCL = 1
        //   2) ACK: SDA = 0, SCL = 1
        //      SDA is still pulled low by the slave, so we have to signal the slave to release it.
        //      We will do this by letting SCL go low.
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // if we had a NACK then r_error = 2
        // if we had an ACK then r_error = 0
        "BNEZ.N %[r_error], l_stop_after_NACK;"
        // Send stop after ACK
        // Precondition: SDA = 0, SCL = 1

        // We are at the beginning of the 10th cycle (if there was no clock stretching)
        // Set SCL = 0
        // During the first half cycle the slave should release SDA...
        "S16I   %[r_scl_bitmask], %[r_set], 4;" // clear : 0x60000308
        "MEMW;"
        "l_w08:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w08;"

        // Check for a repeated start
        // Branch if r_repeated is 0, i.e. is no repeated start, just send stop
        "BEQZ.N %[r_repeated], l_no_repeated_start;"
        // Make sure that the precondition for the next command (i.e. the start) will be met
        // Currently, SCL = 0 and SDA is starting to raise, since the slave has released it
        // To be on the safe side, we set both SCL = 1 _and SDA = 1
        // SDA  = 1
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "MEMW;"
        // SCL = 1;
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        // Postcondition: SCL = 1 and SDA = 1, now the next i2c command send start
        "j l_exit;"

        "l_no_repeated_start:"
        // For the second half cycle, we set SDA = 0, SCL = 1
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        "S16I   %[r_sda_bitmask], %[r_set], 4;" // clear : 0x60000308
        "MEMW;"
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "l_w09:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w09;"

        // For the first half cycle of the 11th cycle, we set SDA = 1 and leave SCL = 1
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // SDA = 1 (SCL is already high, we don't need to change it)
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "MEMW;"
        "l_w10:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w10;"
        "j l_exit;"

        "l_stop_after_NACK:"
        // Send stop after NACK
        // Precondition: SDA = 1, SCL = 1

        // SDA = 0
        // SCL = 1 : In "normal" cycles we woud set SCL to 0
        "S16I   %[r_sda_bitmask], %[r_set], 4;"  // clear: 0x60000308
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        // Delay for the first half cycle of 10th cycle
        "MEMW;"
        "l_w11:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w11;"
        // Postcondition: SDA = 0 and SCL = 1

        // Now we set SDA = 1 and leave SCL = 1 : This ist the STOP condition,
        //   i.e. the "A LOW to HIGH transition on the SDA line while SCL is HIGH"
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        // SDA = 1 (SCL is already high, we don't need to change it)
        "MEMW;"
        "l_w12:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w12;"

        "l_exit:"
        // Enable all interrupts again, i.e. interrupts with interrupt level >= 1
        "RSIL   %[r_temp1], 0;"

        : [r_set] "+r" (a_set), [r_repeated] "+r" (a_repeated), [r_temp1] "+r" (a_temp1), [r_in_value] "+r" (a_in_value), [r_error] "+r" (i2c_error), [r_bit_index] "+r" (a_bit_index), [r_adr_array_element] "+r" (&data[0]), [r_byte_to_send] "+r" (byte_to_send), [r_no_of_bytes] "+r" (no_of_bytes)
        : [r_sda_bitmask] "r" (sda_bitmask), [r_scl_bitmask] "r" (scl_bitmask), [r_iteration_scl_halfcycle] "r" (iteration_scl_halfcycle), [r_iteration_minimize_spike] "r" (iteration_remove_spike), [r_iteration_scl_clock_stretch] "r" (iteration_scl_clock_stretch)
        : "memory"
    );

I would really appreciate if you could help me understand what it is that is going wrong and what options there are to solve it. Thanks in advance.

jcmvbkbc commented 7 years ago

That looks interesting. One workaround that I see is the switch -fomit-frame-pointer, which frees a15. But the assembly only wants 14 registers, which we have even when the frame pointer is in use. Let me look at it some more.

mroavi commented 7 years ago

I finally manged to compile it.

Adding the -O2 option to the CFLAGS did the trick:

# compiler flags using during compilation of source files
CFLAGS      =   -g -O2          \
                -Wpointer-arith     \
                -Wundef         \
                -Wl,-EL         \
                -fno-inline-functions   \
                -nostdlib       \
                -mlongcalls \
                -mtext-section-literals \
                -ffunction-sections \
                -fdata-sections \
                -fno-builtin-printf\
                -DICACHE_FLASH \
                -DBUID_TIME=\"$(DATETIME)\" \
                -std=gnu89

It also works with -g -O1 but not with -g -O0.