phicore commented 2 months ago

Describe the bug

Exact same context as #56.

If we do not open and close the mailboxes between each transfer, the drivers seems to act like it receives input several messages in bursts.

See screenshot Untitled2

To Reproduce

we launch the driver the following way:

'dev-can-linux -q -U3 -e 1c:08,0x05 -b id=3,freq=125k,btr0=0x07,btr1=0x14 &'

Then we launch our can-loop program (see #56).

Platform

Target QNX architecture: Arm64
CAN-bus hardware device: PCAN-miniPCIe card
Version: QNX 7.1

Driver

Driver loaded: peak_pci
Branch: main
Version: 1.2.0

Code


void *fctThreadCan3( void *arg )
{
    CAN_DEF_ERREUR ret_fct = CAN_NODEFINE;
    CmdCan_t vCmdCan;

    memset(&vCmdCan.vTrame,0, sizeof(trameCan_t));
    vCmdCan.dev_gstCanId = fctMgrOpen("/dev/can3");
    if ( vCmdCan.dev_gstCanId == -1 )
    {
        printf(" fctMgrOpen /dev/can3  failed\n");
        exit(EXIT_FAILURE);
    }
    delay(1);

    printf("can3 - entering thread \n");
    while(1)
    {
    //  memset(&vCmdCan.vTrame,0, sizeof(trameCan_t));
    //  vCmdCan.dev_gstCanId = fctMgrOpen("/dev/can3");
    //  if ( vCmdCan.dev_gstCanId == -1 )
    //  {
    //      printf(" fctMgrOpen /dev/can3  failed\n");
    //      exit(EXIT_FAILURE);
    //  }
    //  delay(1);
        ret_fct = fctCmdRxD (vCmdCan.dev_gstCanId, &vCmdCan.vTrame, 1000000);
        if ( ret_fct == CAN_NOERR )
        {
            // Only answer if message ID bit 11 to 8 contain the can interface number (3) 
            if( ((vCmdCan.vTrame.idCan & 0x7FF) >> 8) == 3)
            {
                // Send back with incremented mid
                vCmdCan.vTrame.idCan++;
                if( ((vCmdCan.vTrame.idCan) & 0xFF) > 0xFF)
                {
                    vCmdCan.vTrame.idCan = 0x300;
                }
            vCmdCan.vTrame.queue = CAN_LPQUEUE;
            fctSendCmdTxD(vCmdCan.dev_gstCanId, &vCmdCan.vTrame);   
            }
        }
        delay(2);
        //fctMgrClose(vCmdCan.dev_gstCanId);
        //delay(1); 
    }
    fctMgrClose(vCmdCan.dev_gstCanId);
    delay(1);   
}

We need to perform the open/close for each loop iteration (commented parts) to have correct behaviour.

Open/close functions here:


#define MAX_CAN_DEVICES 4
#define CAN_DEVICE_PATH_MAX 16

struct canDevice
{
    int rx;
    int tx;
    bool open;
int dev_id;
};

static struct canDevice canDevices[MAX_CAN_DEVICES] = {};

static int fctMgrOpenMailbox(const char *dev, const char *mailbox)
{
    int fd = -1;
    char mailboxPath[CAN_DEVICE_PATH_MAX];

    if (snprintf(mailboxPath, CAN_DEVICE_PATH_MAX, "%s/%s", dev, mailbox) >= CAN_DEVICE_PATH_MAX)
    {
        return fd;
    }

    if(mailbox[0]=='r')
        fd = open(mailboxPath, O_RDONLY);
    else
        fd = open(mailboxPath, O_WRONLY);

    return(fd);
}

int fctMgrOpen(const char * dev)
{
int deviceindex = -1; // we need to find the CANx value from the /dev string.

    for (int i = 0; i < MAX_CAN_DEVICES; i++)
    {
        if (canDevices[i].open)
        {
            continue;
        }

        if((dev[8] >='0') &&  (dev[8] <='4'))
        {
            deviceindex = dev[8] -'0' ;
        }

        int rx = fctMgrOpenMailbox(dev, "rx0");
        if (rx == -1)
        {
            fprintf(stderr, "(open) %s rx0, %s\n", dev, strerror(errno));
            return -1;
        }

        int tx = -1;
        char tx_mailbox[] = "tx?";

        // depending if we are on Flexcan or PEAK, the tx mailbox is tx1 or tx0.
        //
        for (int tx_id = '0'; tx_id <= '1'; tx_id++)
        {
            tx_mailbox[2] = tx_id;
            tx = fctMgrOpenMailbox(dev, tx_mailbox);
            if (tx != -1)
            {
                break;
            }
        }

        if (tx == -1)
        {
            return -1;
        }

        canDevices[i] = (struct canDevice)
        {
            .rx = rx,
            .tx = tx,
            .dev_id = deviceindex,
            .open = true,
        };

        return i;
    }
    return -1;
}

int fctMgrClose(int fd)
{
    if (fd >= 0 && fd < MAX_CAN_DEVICES && canDevices[fd].open)
    {
        canDevices[fd].open = false;
        close(canDevices[fd].rx);
        close(canDevices[fd].tx);
        fd=-1;
    }
    return(-1);
}

Deniz-Eren commented 2 months ago

@phicore can you please post code for functions fctCmdRxD and fctSendCmdTxD also?

Deniz-Eren commented 2 months ago

@phicore one initial comment; when you send message on /dev/can3/tx0 that same message is echoed to /dev/can3/rx0.

Could this be causing the issue reported?

phicore commented 2 months ago

@phicore can you please post code for functions fctCmdRxD and fctSendCmdTxD also?

Sure, here they are.


CAN_DEF_ERREUR fctCmdRxD (int fd, trameCan_t * Trame, unsigned int TimeOut)
{
    // Check whether fd is in bounds and device entry is marked open?
    if (fd >= 0 && fd < MAX_CAN_DEVICES && canDevices[fd].open)
    {
        CAN_DEF_ERREUR     status = CAN_NODEFINE;
        uint64_t           nTime;
        struct sigevent    event;
        struct can_msg     canmsg;
        int                error;

        nTime = (uint64_t)TimeOut * (uint64_t)1000000;
        event.sigev_notify = SIGEV_UNBLOCK ;
        TimerTimeout(CLOCK_MONOTONIC, _NTO_TIMEOUT_SEND | _NTO_TIMEOUT_REPLY, &event, &nTime, NULL) ;

        memset(&canmsg, 0, sizeof(struct can_msg));
        error = read_frame_raw_block(canDevices[fd].rx, &canmsg);

        // Convert to trame_can_t structure
        // if data in low 12 bits copy as is
        if ( (canmsg.mid & 0x00000FFF) !=0)
            Trame->idCan = (canmsg.mid & 0x000007FF);

        // if in upper area then copy to low word
        if ( (canmsg.mid & 0x0FFF0000) !=0)
            Trame->idCan = ((canmsg.mid >>18) & 0x7FF);

        Trame->len = canmsg.len;
        memcpy(Trame->tabCan,canmsg.dat,8); 

        if( ( error != EOK ) && ( error != EINTR ) )
        { 
            fprintf(stderr, "Err fctCmdRxD, (read_frame_raw_block) code: %d %s\n",error, strerror(error));
        } 
        // Test timeout and `error` value on timeout.
        else if ( ( error == EINTR ) ||  ( error == ETIME ))
        {
            status = CAN_TIMEOUT;
        }
        else
        {
            status = CAN_NOERR;
        }
        return(status);
    }
    else
        return (-1);
}


int fctSendCmdTxD(int fd, trameCan_t * Trame)
{
    // Check whether fd is in bounds and device entry is marked open?
    if (fd >= 0 && fd < MAX_CAN_DEVICES && canDevices[fd].open)
        {
            struct can_msg canmsg;
            memset(&canmsg, 0, sizeof(struct can_msg));

            canmsg.len = Trame->len;

            //printf("fd: %d deviceindex: %d", fd,canDevices[fd].dev_id);

#if 0    // Needed for 1.0.26 dev-can-linux driver          
            if(canDevices[fd].dev_id == PEAK_CANINDEX)
            {
                // PEAK driver does not respect QNX mapping
                canmsg.mid = Trame->idCan;
            }
            else
#endif          
            {
                // QNX needs standard MID in bits 18 to 29
                canmsg.mid = (Trame->idCan) << 18;
            }           
            canmsg.ext.is_extended_mid = 0;
            memcpy(&canmsg.dat, Trame->tabCan, 8);
            return write_frame_raw(canDevices[fd].tx, &canmsg);
        }
    else
        return (-1);
}

phicore commented 2 months ago

Sorry didn't meant to close

phicore commented 2 months ago

@phicore one initial comment; when you send message on /dev/can3/tx0 that same message is echoed to /dev/can3/rx0.

Could this be causing the issue reported?

The goal of the can-loop program is to answer back the same message with an incremented MID. Is this what you meant ?

The issue here is that for one received message we send out several answers answer instead of just one. The same code on the 3 other integrated CAN interfaces behaves correctly. To solve the issue we found an ugly fix consisting to close and reopen the device for each message. This possibly leads to the other issue we list in #56 in which we observe that by doing so we are limited to sending around 65K messages and then the driver does not seem to handle new messages.

Deniz-Eren commented 2 months ago

The goal of the can-loop program is to answer back the same message with an incremented MID. Is this what you meant ?

I was just letting you know that the current implementation is done such that any messages you send on /dev/can3/tx0 will be received on /dev/can3/rx0, it's an auto echo back feature that exists with Linux implementations and I've repeated it in this QNX implementation. I was wondering if this in-built feature is the cause of your issue report here? If that's the case, then I can make the echo back feature an optional feature somehow.

phicore commented 2 months ago

Interesting, if you can point me to the source file location were I could test to remove it it will be a good test. Thanks a lot

Deniz-Eren commented 2 months ago

That's a good idea. If you do this experiment and see that it fixes the issue, then I can implement more options for users to turn off the echo back feature.

To do the experiment you need to add a small bit of code to function netif_rx; inside the netif_rx function at line161 of netif.c just add the following:

    if (skb->is_echo) {
        return NET_RX_SUCCESS;
    }

Let me know if this fixes your issues so that I can make a permanent feature.

phicore commented 2 months ago

We tried your solution, it works perfectly. Thanks a lot.

Deniz-Eren commented 2 months ago

Excellent to hear.

Let's leave this ticket open until I implement this feature as a configurable option.

Deniz-Eren commented 2 months ago

@phicore Actually a question for you; would the default behaviour for QNX CAN-bus drivers be the non-echo behaviour?

It seems in Linux the default is to echo. When I implement this configuration, I'm thinking of making the default non-echo based on your experiences with other QNX CAN-bus drivers; let me know what you think.

phicore commented 2 months ago

@phicore Actually a question for you; would the default behaviour for QNX CAN-bus drivers be the non-echo behaviour?

I only ever used QNX on the iMX8QM platform, and the CAN interface is used in RAW mode only, but yes, I suppose it is non-echo by default on QNX.

That being said, internal loop-back exists in the devctl_info structure: https://www.qnx.com/developers/docs/7.1/#com.qnx.doc.neutrino.devctl/topic/can/can_devctl_get_info.html)

Deniz-Eren commented 2 months ago

Yes I will update those loop back info messages accordingly also.

Deniz-Eren / dev-can-linux

Internal loopback feature (Linux IFF_ECHO) should be made configurable (related to #56) #57

Describe the bug

To Reproduce

Platform

Driver

Code