OpenEtherCATsociety / SOEM

Simple Open Source EtherCAT Master
Other
1.35k stars 685 forks source link

Advices about the code written so far with Xenomai and DC #368

Closed Davidinos closed 4 years ago

Davidinos commented 4 years ago

Goodmorning, I write you to have your opinion about the code reported below. I'm using a Beaglebone black (1GHz single core) with Xenomai Cobalt patch and DC enabled at 1ms. Do you find anything wrong? I ask you because with our tests we see that sometimes it loses the sync.. Besides I wonder if I can improve the jitter In attachment the wireshark result with a cycle time 1ms. 1msCycleTime.zip

Thank you.

`/** \file

include

include

include

include <sys/time.h>

include

include

include

include

include

include

include

include

include <sys/mman.h>

include <alchemy/task.h>

include <alchemy/timer.h>

include <alchemy/sem.h>

include <boilerplate/trace.h>

include <xenomai/init.h>

//#include "ethercat.h"

include "includeSOEM/ethercat.h"

define NSEC_PER_SEC 1000000000

define EC_TIMEOUTMON 500

struct sched_param schedp; char IOmap[4096]; RT_TASK thread1, thread2; RT_SEM sem_desc; struct timeval tv, t1, t2; int dorun = 0; int deltat, tmax = 0; int64 toff, gl_delta; int DCdiff; int os; uint8 ob; uint16 ob2; uint8 *digout = 0; int expectedWKC; boolean needlf; volatile int wkc; boolean inOP; uint8 currentgroup = 0;

int64 maxDelta = 0;

define SYNC0_SIGNAL_PERIOD_NS 1000000

define stack64k (64 * 1024)

define SEM_INIT 0 / Initial semaphore count /

define SEM_MODE S_FIFO / Wait by FIFO order /

define ETHCAT_CHECK_LOOP 10000000 //Expressed in ticks

/*****


static int slave_dc_config(uint16 slave) { ec_dcsync0(slave, TRUE, SYNC0_SIGNAL_PERIOD_NS, 10); return 0; }

void catch_signal(int sig) { dorun=0; usleep(5e5); rt_task_delete(&thread1); rt_task_delete(&thread2); exit(1); }

/*****


void redtest(char ifname, char ifname2) { int cnt, i, j, oloop, iloop;

printf("Starting Redundant test\n");

/ initialise SOEM, bind socket to ifname / // if (ec_init_redundant(ifname, ifname2)) if (ec_init(ifname)) { printf("ec_init on %s succeeded.\n",ifname); / find and auto-config slaves / if ( ec_config_init(FALSE) > 0 ) { / configure DC options for every DC capable slave found in the list / ec_configdc();

     for(cnt = 1; cnt <= ec_slavecount ; cnt++)
     {
         if(ec_slave[cnt].hasdc > 0)
         {
             ec_slave[cnt].PO2SOconfig = slave_dc_config;
         }
     }

     ec_config_map(&IOmap);

     /* read indevidual slave state and store in ec_slave[] */
     ec_readstate();
     for(cnt = 1; cnt <= ec_slavecount ; cnt++)
     {
        printf("Slave:%d Name:%s Output size:%3dbits Input size:%3dbits State:%2d delay:%d.%d\n",
              cnt, ec_slave[cnt].name, ec_slave[cnt].Obits, ec_slave[cnt].Ibits,
              ec_slave[cnt].state, (int)ec_slave[cnt].pdelay, ec_slave[cnt].hasdc);
        printf("         Out:%8.8x,%4d In:%8.8x,%4d\n",
              (int)ec_slave[cnt].outputs, ec_slave[cnt].Obytes, (int)ec_slave[cnt].inputs, ec_slave[cnt].Ibytes);
        /* check for EL2004 or EL2008 */
        if( !digout && ((ec_slave[cnt].eep_id == 0x0af83052) || (ec_slave[cnt].eep_id == 0x07d83052)))
        {
           digout = ec_slave[cnt].outputs;
        }
     }
     expectedWKC = (ec_group[0].outputsWKC * 2) + ec_group[0].inputsWKC;
     printf("Calculated workcounter %d\n", expectedWKC);

     printf("Request operational state for all slaves\n");
     ec_slave[0].state = EC_STATE_OPERATIONAL;
     /* request OP state for all slaves */
     ec_writestate(0);
     /* activate cyclic process data */
     dorun = 1;

     /* Release the semaphore after reaching EC_STATE_OPERATIONAL. */
     rt_sem_v(&sem_desc);

     /* wait for all slaves to reach OP state */
     ec_statecheck(0, EC_STATE_OPERATIONAL,  5 * EC_TIMEOUTSTATE);
     oloop = ec_slave[0].Obytes;
     if ((oloop == 0) && (ec_slave[0].Obits > 0)) oloop = 1;
     if (oloop > 8) oloop = 8;
     iloop = ec_slave[0].Ibytes;
     if ((iloop == 0) && (ec_slave[0].Ibits > 0)) iloop = 1;
     if (iloop > 8) iloop = 8;
     if (ec_slave[0].state == EC_STATE_OPERATIONAL )
     {
        printf("Operational state reached for all slaves.\n");
        inOP = TRUE;
        /* acyclic loop 5000 x 20ms = 10s */
        for(i = 1; i <= 5000; i++)
        {
           printf("Processdata cycle %5d , Wck %3d, DCtime %12lld, dt %12lld, O:",
              dorun, wkc , ec_DCtime, gl_delta);
           for(j = 0 ; j < oloop; j++)
           {
              printf(" %2.2x", *(ec_slave[0].outputs + j));
           }
           printf(" I:");
           for(j = 0 ; j < iloop; j++)
           {
              printf(" %2.2x", *(ec_slave[0].inputs + j));
           }
           printf("\r");
           fflush(stdout);
           osal_usleep(20000);
        }
        dorun = 0;
        inOP = FALSE;

        printf("Max delta: %12lld\n", maxDelta);
        fflush(stdout);
     }
     else
     {
        printf("Not all slaves reached operational state.\n");
         ec_readstate();
         for(i = 1; i<=ec_slavecount ; i++)
         {
             if(ec_slave[i].state != EC_STATE_OPERATIONAL)
             {
                 printf("Slave %d State=0x%2.2x StatusCode=0x%4.4x : %s\n",
                     i, ec_slave[i].state, ec_slave[i].ALstatuscode, ec_ALstatuscode2string(ec_slave[i].ALstatuscode));
             }
         }
     }
     printf("Request safe operational state for all slaves\n");
     ec_slave[0].state = EC_STATE_SAFE_OP;
     /* request SAFE_OP state for all slaves */
     ec_writestate(0);
  }
  else
  {
     printf("No slaves found!\n");
  }
  printf("End redundant test, close socket\n");
  /* stop SOEM, close socket */
  ec_close();

} else { printf("No socket connection on %s\nExcecute as root\n",ifname); } }

/ PI calculation to get linux time synced to DC time / void ec_sync(int64 reftime, int64 cycletime , int64 offsettime) { static int64 integral = 0; int64 delta; / set linux sync point 50us later than DC sync, just as example / delta = (reftime - 50000) % cycletime; if(delta> (cycletime / 2)) { delta= delta - cycletime; } if(delta>0){ integral++; } if(delta<0){ integral--; } offsettime = -(delta / 100) - (integral / 20); gl_delta = delta; // Max delta found maxDelta = (maxDelta > gl_delta) ? maxDelta : gl_delta; }

/*****


void ecatthread(void *ptr) { RTIME cycle_ns; RTIME cur_time, cur_cycle_cnt, cycle_time, remain_time; RTIME dc_remain_time, rt_ts;

toff = 0; dorun = 0; maxDelta = 0;

/ Now, wait for a semaphore unit... / rt_sem_p(&sem_desc, TM_INFINITE);

ec_send_processdata();

cycle_ns = (int)ptr 1000; // cycletime in ns cur_time = rt_timer_read(); // get current master time cur_cycle_cnt = cur_time / cycle_ns; // calcualte number of cycles has passed cycle_time = cur_cycle_cnt cycle_ns; remain_time = cur_time % cycle_ns; // remain time to next cycle, test only

rt_printf("cycle_cnt=%lld\n", cur_cycle_cnt); rt_printf("remain_time=%lld\n", remain_time);

wkc = ec_receive_processdata(EC_TIMEOUTRET); // get reference DC time

dc_remain_time = ec_DCtime % cycle_ns; rt_ts = cycle_time + dc_remain_time;

rt_printf("dc remain_time=%lld\n", dc_remain_time); rt_task_sleep_until(rt_ts);

while(1) { rt_ts += (RTIME) (cycle_ns + toff); rt_task_sleep_until(rt_ts);

   if (dorun>0)
   {
        wkc = ec_receive_processdata(EC_TIMEOUTRET);

        dorun++;
        // if we have some digital output, cycle
        if( digout ) *digout = (uint8) ((dorun / 16) & 0xff);

        if (ec_slave[0].hasdc)
        {
            // calulate toff to get linux time and DC synced
            ec_sync(ec_DCtime, cycle_ns, &toff);
        }
        ec_send_processdata();
   }

} }

/*****


void ecatcheck( void *ptr ) { int slave;

rt_task_set_periodic(NULL, TM_NOW, ETHCAT_CHECK_LOOP);
//RTIME tstart = rt_timer_read();

while(1)
{
    //rt_printf("Loop time: %.5f ms\n", (rt_timer_read() - tstart)/1000000.0);
    if( inOP && ((wkc < expectedWKC) || ec_group[currentgroup].docheckstate))
    {
        if (needlf)
        {
           needlf = FALSE;
           rt_printf("\n");
        }
        // one ore more slaves are not responding
        ec_group[currentgroup].docheckstate = FALSE;
        ec_readstate();
        for (slave = 1; slave <= ec_slavecount; slave++)
        {
           if ((ec_slave[slave].group == currentgroup) && (ec_slave[slave].state != EC_STATE_OPERATIONAL))
           {
              ec_group[currentgroup].docheckstate = TRUE;
              if (ec_slave[slave].state == (EC_STATE_SAFE_OP + EC_STATE_ERROR))
              {
                  rt_printf("ERROR : slave %d is in SAFE_OP + ERROR, attempting ack.\n", slave);
                 ec_slave[slave].state = (EC_STATE_SAFE_OP + EC_STATE_ACK);
                 ec_writestate(slave);
              }
              else if(ec_slave[slave].state == EC_STATE_SAFE_OP)
              {
                  rt_printf("WARNING : slave %d is in SAFE_OP, change to OPERATIONAL.\n", slave);
                 ec_slave[slave].state = EC_STATE_OPERATIONAL;
                 ec_writestate(slave);
              }
              else if(ec_slave[slave].state > EC_STATE_NONE)
              {
                 if (ec_reconfig_slave(slave, EC_TIMEOUTMON))
                 {
                    ec_slave[slave].islost = FALSE;
                    rt_printf("MESSAGE : slave %d reconfigured\n",slave);
                 }
              }
              else if(!ec_slave[slave].islost)
              {
                 // re-check state
                 ec_statecheck(slave, EC_STATE_OPERATIONAL, EC_TIMEOUTRET);
                 if (ec_slave[slave].state == EC_STATE_NONE)
                 {
                    ec_slave[slave].islost = TRUE;
                    rt_printf("ERROR : slave %d lost\n",slave);
                 }
              }
           }
           if (ec_slave[slave].islost)
           {
              if(ec_slave[slave].state == EC_STATE_NONE)
              {
                 if (ec_recover_slave(slave, EC_TIMEOUTMON))
                 {
                    ec_slave[slave].islost = FALSE;
                    rt_printf("MESSAGE : slave %d recovered\n",slave);
                 }
              }
              else
              {
                 ec_slave[slave].islost = FALSE;
                 rt_printf("MESSAGE : slave %d found\n",slave);
              }
           }
        }
        if(!ec_group[currentgroup].docheckstate)
            rt_printf("OK : all slaves resumed OPERATIONAL.\n");
    }

    // Sleep for 10ms
    rt_task_wait_period(NULL);          //rt_task_sleep(10000000);
}

}

/*****


int main(int argc, char *argv[]) { // Handle Signals to close threads signal(SIGTERM, catch_signal); signal(SIGINT, catch_signal);

// Block memory mlockall(MCL_CURRENT | MCL_FUTURE);

printf("SOEM (Simple Open EtherCAT Master)\nRedundancy test\n");

if (argc > 3) { int ctime = 0; //Create a semaphore rt_sem_create(&sem_desc, "MySemaphore", SEM_INIT, SEM_MODE);

    dorun = 0;
    ctime = atoi(argv[3]);

    // create thread to update PDOs content
    rt_task_create(&thread1, "SOEM_demo_task", stack64k * 2, 90, 0 );
    // create thread to handle slave error handling in OP
    rt_task_create(&thread2, "SOEM_check_task", stack64k * 4, 10, 0 );
    // Ask Xenomai to warn us upon switches to secondary mode.
    //rt_task_set_mode(0, T_WARNSW, NULL);
    // start cyclic rt_thread
    rt_task_start(&thread1, &ecatthread, (void*) &ctime);
    rt_task_start(&thread2, &ecatcheck, (void*) NULL);

    /* start acyclic part */
    redtest(argv[1],argv[2]);
    //printf("\nType CTRL-C to end this program\n\n" );
    //pause();

} else { printf("Usage: red_test ifname1 ifname2 cycletime\nifname = eth0 for example\ncycletime in us\n"); }

printf("End program\n");

return (0); }`

ArthurKetels commented 4 years ago

Thanks for the info. But actually you ask a xenomai question. There are examples where SOEM runs on bare metal hardware and has maximum jitter below 1us. The issue is with your hardware and OS support. No matter how clever xenomai is written, it can not do much to improve basic design flaws in hardware and drivers. Coding for real time is fundamentally different from what is available in common OS concepts.

Off my soapbox now. What you need to do is instrument your code to find the timing issues. Log your PDO thread timing in relation to the reference slave clock. When possible also log the different time paths from starting the cycle to sending data, to receiving data. The more fine grained the more informative. Store the log in histogram format, that way you can keep it in memory and still catch millions of events.

Davidinos commented 4 years ago

Hello Arthur, thank you for your answer. Your point is very clear and probably we'll move to bare metal implementation to reach our goal. I have one last question, the only modification that I applied to xenomai implementation regards the osal.c and osal.h since the time base functions are different. None other files needs to be changed right ? (consider we didn't change the nic..). Thank you for your help.

ArthurKetels commented 4 years ago

Correct. That is the whole point of osal (operating system abstraction layer). Each target need to implement the optimum OS version of the timing and threading primitives. SOEM is not critical about the specific performance of the implementation, but off course the total performance will suffer from an inefficient implementation.

Davidinos commented 4 years ago

Thank you very much Arthur.