espressif / ESP8266_RTOS_SDK

Latest ESP8266 SDK based on FreeRTOS, esp-idf style.
http://bbs.espressif.com
Apache License 2.0
3.34k stars 1.57k forks source link

hw_timer frc1 with reload mode false and CLKDIV_256 causes WDT (GIT8266O-664) #1086

Open jvh003 opened 3 years ago

jvh003 commented 3 years ago

Environment

Problem Description

Trying to use the frc1 hardware timer with CLKDIV 256 in single shot mode to set a delay of 20s. My test interrupt service routine enables 2 GPIO pins for debug, and also sends a message to a message queue. Changed my test code to use a 1s delay for testing.

As a small (minor) thing I also noticed while try to work on this: The comment on line 27 in timer_struct.h is in-correct. The FRC1 is a 23 bit timer, not 24 as stated in the include file.

/* ESP8266 FRC Register Definitions */
// FRC1 is a 24-bit countdown timer, triggers interrupt when reaches zero.

Expected Behavior

After 1s the interrupt service routine should fire, turn on the 2 GPIO pins, and send my test msg to the queue. See function hw_timer_callback() below. This timer should work in either reload true or false mode.

Actual Behavior

This timer only works properly in reload true mode. If the reload mode is false:

Steps to repropduce

Load the code and run it. The code was modified from the example code found here: hw_timer_example_main.c However, since I wanted to code up a larger delay ie: 20s, I modified the example to explicitly call all the hw_timer functions that the function hw_timer_alarm_us() was calling internally. (see below).

Code to reproduce this issue

#include <stdio.h>
#include <string.h>

#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "driver/gpio.h"
#include "esp_log.h"
#include "esp_system.h"
#include "esp_spi_flash.h"
#include "appMain.h"
#include "LedControl.h"
#include "driver/hw_timer.h"

#define LED_ON_BOARD_BLUE   GPIO_NUM_2   // D4 - Esp12e onboard blue led
#define LEFT_LED            GPIO_NUM_0   // D3 - Left red Led
#define RIGHT_LED           GPIO_NUM_15  // D8 - Right red Led
#define GPIO_OUTPUT_IO_0    LEFT_LED
#define GPIO_OUTPUT_IO_1    RIGHT_LED

static const char TAG[] = "main";
static QueueHandle_t MsgQueue = NULL;

void initPin(gpio_num_t pin)
{
   gpio_config_t config;
   memset((void*)&config, 0, sizeof(config));
   config.intr_type = GPIO_INTR_DISABLE;
   config.mode = GPIO_MODE_OUTPUT;
   config.pin_bit_mask = (1UL << pin);
   config.pull_down_en = GPIO_PULLDOWN_DISABLE;
   config.pull_up_en = GPIO_PULLUP_DISABLE;
   gpio_config(&config);
   gpio_set_level(pin, 0);
}

void ledsOn()
{
   gpio_set_level(LED_ON_BOARD_BLUE, 0);  // active low
   gpio_set_level(LEFT_LED, 1);
   gpio_set_level(RIGHT_LED, 1);
}

void ledsOff()
{
   gpio_set_level(LED_ON_BOARD_BLUE, 1);  // active low
   gpio_set_level(LEFT_LED, 0);
   gpio_set_level(RIGHT_LED, 0);
}

void hw_timer_callback(void *arg)
{
    gpio_set_level(GPIO_OUTPUT_IO_0, 1);
    xQueueSendFromISR(MsgQueue, &arg, NULL);
    gpio_set_level(GPIO_OUTPUT_IO_1, 1);
}

void msgTask(void *arg)
{
   void *pMsg;
   uint32_t value;
   printf("msgTask() start\n");
   while (true)
   {
      xQueueReceive(MsgQueue, &pMsg, portMAX_DELAY);
      value = *((uint32_t *)pMsg);
      ESP_LOGW(TAG, "msg rec'd %u from %u", value, (uint32_t)pMsg);
      ledsOff();
   }
}

void app_main()
{
   /* Print chip information */
   esp_chip_info_t chip_info;
   esp_chip_info(&chip_info);
   printf("This is ESP8266 chip with %d CPU cores, WiFi, ", chip_info.cores);
   printf("silicon revision %d, ", chip_info.revision);
   printf("%dMB %s flash\n", spi_flash_get_chip_size() / (1024 * 1024),
          (chip_info.features & CHIP_FEATURE_EMB_FLASH) ? "embedded" : "external");

   initPin(LED_ON_BOARD_BLUE);
   initPin(LEFT_LED);
   initPin(RIGHT_LED);
   ledsOff();

   MsgQueue = xQueueCreate(10, sizeof(void *));
   if (MsgQueue == NULL)
   {
      ESP_LOGE(TAG, "Failed to create queue");
   }
   xTaskCreate(&msgTask, "MsgTask", 4096, NULL, 5, NULL);

   int delay = 10; //seconds
   uint32_t arg = 30;

   for (int i=0; i<20; i++)
   {
#if 0
      ESP_LOGI(TAG, "Initialize hw_timer for callback");
      hw_timer_init(hw_timer_callback, &arg);
      ESP_LOGI(TAG, "Set hw_timer timing time 1000ms with reload: %d", TEST_RELOAD);
      hw_timer_alarm_us(1000000, TEST_RELOAD);
#else

#if 0
      // this code block works fine with reload true or false
      uint32_t value = 1000000; // 1s
      uint32_t counter = ((TIMER_BASE_CLK >> TIMER_CLKDIV_16) / 1000000) * value; // value units are micro-seconds
      ESP_LOGI(TAG, "Initialize hw_timer for callback, value %u, counter %u (0x%x)", value, counter, counter);
      hw_timer_init(hw_timer_callback, &arg);
      hw_timer_set_reload(true); // false - also works
      hw_timer_set_clkdiv(TIMER_CLKDIV_16);
      hw_timer_set_intr_type(TIMER_EDGE_INT);
      hw_timer_set_load_data(counter);
      ESP_LOGI(TAG, "Retrieved counter %u", hw_timer_get_load_data());
      hw_timer_enable(true);
#else
      // this code block works only if reload is true, it hangs and WDT if reload is false
      uint32_t value = 1;  // 1s
      uint32_t counter = (TIMER_BASE_CLK >> TIMER_CLKDIV_256) * value;  // units seconds
      ESP_LOGI(TAG, "Initialize hw_timer for callback, value %u, counter %u (0x%x)", value, counter, counter);
      hw_timer_init(hw_timer_callback, &arg);
      hw_timer_set_reload(false);  // true is ok, false hangs and causes WDT
      hw_timer_set_clkdiv(TIMER_CLKDIV_256);
      hw_timer_set_intr_type(TIMER_EDGE_INT);
      hw_timer_set_load_data(counter);
      ESP_LOGI(TAG, "Retrieved counter %u", hw_timer_get_load_data());
      hw_timer_enable(true);
#endif
#endif

      printf("Delay %ds\n", delay);
      vTaskDelay((delay * 1000)/portTICK_PERIOD_MS);
      printf("Disarm the timer\n");
      hw_timer_disarm();
      hw_timer_deinit();
      ledsOff();
   }

   printf("Restarting now.\n");
   fflush(stdout);
   esp_restart();
}

Debug Logs

Logs for the code running the timer in reload true mode. This is the expected output.

I (59) boot: ESP-IDF v3.4-rc-dirty 2nd stage bootloader
I (60) boot: compile time 17:56:03
I (68) qio_mode: Enabling default flash chip QIO
I (69) boot: SPI Speed      : 40MHz
I (74) boot: SPI Mode       : QIO
I (80) boot: SPI Flash Size : 2MB
I (86) boot: Partition Table:
I (91) boot: ## Label            Usage          Type ST Offset   Length
I (103) boot:  0 nvs              WiFi data        01 02 00009000 00006000
I (114) boot:  1 phy_init         RF data          01 01 0000f000 00001000
I (126) boot:  2 factory          factory app      00 00 00010000 000f0000
I (137) boot: End of partition table
I (144) esp_image: segment 0: paddr=0x00010010 vaddr=0x40210010 size=0x1c1b0 (115120) map
0x40210010: _stext at ??:?

I (196) esp_image: segment 1: paddr=0x0002c1c8 vaddr=0x4022c1c0 size=0x072cc ( 29388) map
I (207) esp_image: segment 2: paddr=0x0003349c vaddr=0x3ffe8000 size=0x00554 (  1364) load
I (208) esp_image: segment 3: paddr=0x000339f8 vaddr=0x40100000 size=0x00080 (   128) load
I (221) esp_image: segment 4: paddr=0x00033a80 vaddr=0x40100080 size=0x04fac ( 20396) load
I (241) boot: Loaded app from partition at offset 0x10000
This is ESP8266 chip with 1 CPU cores, WiFi, silicon revision 1, 2MB external flash
I (256) gpio: GPIO[2]| InputEn: 0| OutputEn: 1| OpenDrain: 0| Pullup: 0| Pulldown: 0| Intr:0
I (264) gpio: GPIO[0]| InputEn: 0| OutputEn: 1| OpenDrain: 0| Pullup: 0| Pulldown: 0| Intr:0
I (278) gpio: GPIO[15]| InputEn: 0| OutputEn: 1| OpenDrain: 0| Pullup: 0| Pulldown: 0| Intr:0
msgTask() start
I (295) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (307) main: Retrieved counter 312500
Delay 10s
W (1315) main: msg rec'd 30 from 1073654908
W (2315) main: msg rec'd 30 from 1073654908
W (3315) main: msg rec'd 30 from 1073654908
W (4315) main: msg rec'd 30 from 1073654908
W (5315) main: msg rec'd 30 from 1073654908
W (6315) main: msg rec'd 30 from 1073654908
W (7315) main: msg rec'd 30 from 1073654908
W (8315) main: msg rec'd 30 from 1073654908
W (9315) main: msg rec'd 30 from 1073654908
W (10316) main: msg rec'd 30 from 1073654908
Disarm the timer
I (10317) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (10323) main: Retrieved counter 312500
Delay 10s
W (11336) main: msg rec'd 30 from 1073654908
W (12336) main: msg rec'd 30 from 1073654908
W (13336) main: msg rec'd 30 from 1073654908
W (14336) main: msg rec'd 30 from 1073654908
W (15336) main: msg rec'd 30 from 1073654908
W (16336) main: msg rec'd 30 from 1073654908
W (17336) main: msg rec'd 30 from 1073654908
W (18336) main: msg rec'd 30 from 1073654908
W (19336) main: msg rec'd 30 from 1073654908
Disarm the timer

These are the logs for the code running in reload false mode with the WDT:

I (59) boot: ESP-IDF v3.4-12-g8b6329de-dirty 2nd stage bootloader
I (60) boot: compile time 17:11:26
I (68) qio_mode: Enabling default flash chip QIO
I (69) boot: SPI Speed      : 40MHz
I (75) boot: SPI Mode       : QIO
I (81) boot: SPI Flash Size : 2MB
I (87) boot: Partition Table:
I (93) boot: ## Label            Usage          Type ST Offset   Length
I (104) boot:  0 nvs              WiFi data        01 02 00009000 00006000
I (116) boot:  1 phy_init         RF data          01 01 0000f000 00001000
I (127) boot:  2 factory          factory app      00 00 00010000 000f0000
I (139) boot: End of partition table
I (145) esp_image: segment 0: paddr=0x00010010 vaddr=0x40210010 size=0x1c1c4 (115140) map
0x40210010: _stext at ??:?

I (197) esp_image: segment 1: paddr=0x0002c1dc vaddr=0x4022c1d4 size=0x072e4 ( 29412) map
I (208) esp_image: segment 2: paddr=0x000334c8 vaddr=0x3ffe8000 size=0x00554 (  1364) load
I (209) esp_image: segment 3: paddr=0x00033a24 vaddr=0x40100000 size=0x00080 (   128) load
I (223) esp_image: segment 4: paddr=0x00033aac vaddr=0x40100080 size=0x04fac ( 20396) load
I (242) boot: Loaded app from partition at offset 0x10000
This is ESP8266 chip with 1 CPU cores, WiFi, silicon revision 1, 2MB external flash
I (257) gpio: GPIO[2]| InputEn: 0| OutputEn: 1| OpenDrain: 0| Pullup: 0| Pulldown: 0| Intr:0
I (265) gpio: GPIO[0]| InputEn: 0| OutputEn: 1| OpenDrain: 0| Pullup: 0| Pulldown: 0| Intr:0
I (279) gpio: GPIO[15]| InputEn: 0| OutputEn: 1| OpenDrain: 0| Pullup: 0| Pulldown: 0| Intr:0
msgTask() start
I (296) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (308) main: Retrieved counter 312500
Delay 10s
Task watchdog got triggered.

Guru Meditation Error: Core  0 panic'ed (IllegalInstruction). Exception was unhandled.
Core 0 register dump:
PC      : 0x4010050e  PS      : 0x00000030  A0      : 0x4010050e  A1      : 0x3ffecde0
0x4010050e: PendSV at D:/development/esp32-dev/ESP8266_RTOS_SDK/components/freertos/port/esp8266/port.c:123
0x4010050e: PendSV at D:/development/esp32-dev/ESP8266_RTOS_SDK/components/freertos/port/esp8266/port.c:123

A2      : 0x00000020  A3      : 0x00000000  A4      : 0x4010555c  A5      : 0x3ffeba9c
A6      : 0xffffffff  A7      : 0x0000000a  A8      : 0x00000000  A9      : 0x00000001
A10     : 0x4022e0f6  A11     : 0x0000000a  A12     : 0x3ffebde0  A13     : 0x00000001
A14     : 0x3ffece30  A15     : 0x00000000  SAR     : 0x00000000  EXCCAUSE: 0x00000000

Backtrace: 0x4010050e:0x3ffecde0 0x402167c4:0x3ffecdf0 0x40216ed7:0x3ffece30
0x4010050e: PendSV at D:/development/esp32-dev/ESP8266_RTOS_SDK/components/freertos/port/esp8266/port.c:123
0x402167c4: xQueueReceive at D:/development/esp32-dev/ESP8266_RTOS_SDK/components/freertos/freertos/queue.c:1345
0x40216ed7: msgTask(void*) at D:/development/eclipse-workspace/esp12-hwtimer-test/main/appMain.cpp:68

Other items if possible

Let me know if you really need/want the sdkconfig or ELF file.

dsptech commented 3 years ago

I removed a my wrong comment. Sorry for the inconvenient.

jvh003 commented 3 years ago

No problem, Do you have any suggestions? I would even offer to fix this, but I am not too familiar with the internals of this OS. But I worked on VxWorks RTOS for many years at Nortel. Is there any docs that you could link to me that could help me figure out how to debug this? Btw, I know this design pattern, ie: sending a msg from the ISR, works great on my Esp32/idf.

Jerry

On Apr 27, 2021, at 12:58 PM, dsptech @.***> wrote:



I removed a my wrong comment. Sorry.

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHubhttps://github.com/espressif/ESP8266_RTOS_SDK/issues/1086#issuecomment-827761908, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACTU53A7AFBXM6CF4XFLFF3TK3ULJANCNFSM43RWZOWA.

dsptech commented 3 years ago

I'm interested to this issue so I've already tested the frc1 timer using the same conditions (using with a very simple handler) ... and I seen that the timer work properly. I also found also a potential issue: It seems that the control register definition of the timer1 does not match the counterpart of the Arduino sdk. These info are not disclosed, so I cannot point out the right one. I think that this can potentially affect the level type interrupt only.

Regarding your issue, the debug log you produced suggest that your msgTask is looping with no waits for an unknown reason (and this task starve the idle task, so the watchdog clear cannot occur).

I suggest to test the timer only as first pass (and to strip out all rtos related), for example:

volatile uint32_t cbCount = 0;
void hw_timer_callback(void *arg)
{
    cbCount++;
}
void app_main()
{
   <initialize timer here>
  for(;;){
       printf("count:%u\n", cbCount );
       vTaskDelay(pdMS_TO_TICKS(1000));
  }
}

P.S.: Search for documentation related to this SDK is .... a very big deal. The link https://docs.espressif.com/projects/esp8266-rtos-sdk/en/latest/ is incomplete, so I use multiple sources as the ESP32 docs (you can find here some debug info), ESP8266 arduino docs, the Espressif forum, the freeRTOS website and other resources over internet, but in my case the most useful tool is Eclipse that allow to find everything in the SDK source code and inspect inside that. You can also try the GDB stub (search for the espressif repository) but I never used it.

jvh003 commented 3 years ago

Good idea. That test reveals some very interesting information. With hw_timer_set_reload(true) and a 10s delay - everything works great. The cbCount is 9 or 10, which is correct. (see working log below).

However, with hw_timer_set_reload(false) and a 10s delay - the cbCount value is 2900484 for example. (see the not working log below).

So I guess this is why the message queue code blows up. The ISR is firing too much and overflowing the queue. But the real question is why is the ISR firing so often? Did you see this behavour?

Working Log:

msgTask() start
I (532) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (544) main: Retrieved counter 312500
Delay 10s, cbCount 0
Disarm the timer
cbCount 9
I (10548) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (10550) main: Retrieved counter 312500
Delay 10s, cbCount 0
Disarm the timer
cbCount 9
I (20549) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (20551) main: Retrieved counter 312500
Delay 10s, cbCount 0
Disarm the timer
cbCount 9

Not working Log:

msgTask() start
I (296) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (308) main: Retrieved counter 312500
Delay 10s, cbCount 0
Disarm the timer
cbCount 2900484
I (10599) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (10601) main: Retrieved counter 312500
Delay 10s, cbCount 162
Disarm the timer
cbCount 2901347
I (20890) main: Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4)
I (20892) main: Retrieved counter 312500
Delay 10s, cbCount 183
Disarm the timer
cbCount 2901369
dsptech commented 3 years ago

Strange, this work to me instead. 1) Can you copy and paste your "no reload" test code here? (please remove all non-timer related code. Post a code simple as possible). I will copy and paste it in my develop machine. 2) Is your SDK updated ? 3) Have you tested on another device ?

dsptech commented 3 years ago

Hi, I tested again the timer ... and it appear to work fine. This is the code I used:

volatile uint32_t cbCount = 0;
IRAM_ATTR void hw_timer_callback(void *arg)
{
    cbCount++;
}

void app_main() {
    uint32_t value = 1;  // 1s
    uint32_t counter = (TIMER_BASE_CLK >> TIMER_CLKDIV_256) * value;
    hw_timer_init(hw_timer_callback, NULL);
    hw_timer_set_reload(false);
    hw_timer_set_clkdiv(TIMER_CLKDIV_256);
    hw_timer_set_intr_type(TIMER_EDGE_INT);
    hw_timer_set_load_data(counter);
    ESP_LOGI(TAG, "Retrieved counter %u", hw_timer_get_load_data());
    hw_timer_enable(true);
    for(;;){
         ESP_LOGI(TAG,"count:%u", cbCount );
         vTaskDelay(pdMS_TO_TICKS(10000));
    }
}

This is the output:

I (280) MAIN2: Retrieved counter 312500
I (283) MAIN2: count:0
I (10283) MAIN2: count:1
I (20283) MAIN2: count:1
I (30283) MAIN2: count:1
I (40283) MAIN2: count:1
dsptech commented 3 years ago

STOP ALL!!

The issue is clock frequency related. Previous tests are performed at 80Mhz. I placed a esp_set_cpu_freq(ESP_CPU_FREQ_160M) just after "app_main()" and this is the new obtained log:

I (281) MAIN2: Retrieved counter 312500
I (282) MAIN2: count:0
I (11091) MAIN2: count:2811024
I (30127) MAIN2: count:8751503
I (49163) MAIN2: count:14690721
dsptech commented 3 years ago

It appear like a timing issue of the hw_timer_isr_cb() function (see hw_timer.c) or a buggy hardware. If I delay a bit the disabling of the timer (example: move it after the callback) the system appear to work but more testing is required.

As fast workaround, I placed the code below at the bottom of the your callback function (this wipe out the timer configuration):

    if (!frc1.ctrl.reload) {
        frc1.ctrl.val=0;
        soc_clear_int_mask(1ul << ETS_FRC_TIMER1_INUM); //required
    }  

Note: for a fast configuration, you can save the frc1.ctrl.val value immediately after the timer activation and write it to restore the full configuration and activate the timer again.

Hi @donghengqaz, sorry to bother you. Have you some related info ? Regards.

jvh003 commented 3 years ago

@dsptech I believe that I have found the problem. Give me some more time, as I'm testing a fix and will get back to you. Thanks.

jvh003 commented 3 years ago

@dsptech finally have time to report back. I thought there was a problem in the layout of the ctrl register in the timer_struct.h header as the code doesn't match the documentation in the ESP8266 Technical Reference Manual, V1.7, 2020.10.21, Appendix 4. for Timer Registers. So I modified the timer_struct.h like this:

    union {
        struct {
            uint32_t intr_type:    1;  // bit 0       RW
            uint32_t reserved1:    1;  // bit 1
            uint32_t div:          2;  // bits 3-2    RW  0-divBy1, 1-divBy16, 2or3-divBy256
            uint32_t reserved2:    2;  // bits 5-4
            uint32_t reload:       1;  // bit 6       RW
            uint32_t en:           1;  // bit 7       RW
            uint32_t status:       1;  // bit 8       RO
            uint32_t reserved23:  23;  // bits 31-9
        };
        uint32_t val;
    } ctrl;

I also made some small changes in the hw_timer.c to account for this in the hw_timer_set_clkdiv() and hw_timer_get_clkdiv(). But it doesn't help. It makes no difference at all in fact.

So back to some other questions you had earlier:

static volatile uint32_t cbCount = 0; IRAM_ATTR void hw_timer_callback(void *arg) { cbCount++; }

void app_main() { for (;;) { uint32_t value = 1; // 1s uint32_t counter = (TIMER_BASE_CLK >> TIMER_CLKDIV_256) * value; // units seconds printf("Initialize hw_timer for callback, value %u, counter %u (0x%x)\n", value, counter, counter); hw_timer_init(hw_timer_callback, NULL); hw_timer_set_reload(false); // true is ok, false NOT ok hw_timer_set_clkdiv(TIMER_CLKDIV_256); hw_timer_set_intr_type(TIMER_EDGE_INT); hw_timer_set_load_data(counter); printf("Retrieved counter %u, ctrlReg 0x%x\n", hw_timer_get_count_data(), frc1.ctrl.val); hw_timer_enable(true); printf("CountReg %u, cbCount %u, ctrlReg 0x%x\n", frc1.count.data, cbCount, frc1.ctrl.val);

  vTaskDelay(10000/portTICK_PERIOD_MS);
  printf("Disarm the timer\n");
  hw_timer_disarm();
  hw_timer_deinit();
  printf("cbCount %u\nDelay 10s\n\n", cbCount);
  vTaskDelay(10000/portTICK_PERIOD_MS);
  cbCount = 0;

} }

And log output for reload(false) case:

Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4) Retrieved counter 312500, ctrlReg 0x8 CountReg 312499, cbCount 0, ctrlReg 0x88 Disarm the timer cbCount 2594002 Delay 10s

Initialize hw_timer for callback, value 1, counter 312500 (0x4c4b4) Retrieved counter 312500, ctrlReg 0x8 CountReg 312499, cbCount 109, ctrlReg 0x88 Disarm the timer cbCount 2591605 Delay 10s

dsptech commented 3 years ago

Hi, the mismatched configuration involve the "intr_type" bit position. However this bit the always set to 0 at each configuration of "div" so the effect is that the timer is always forced to work in edge interrupt mode, regardless the hw_timer_set_intr_type() call.

In my previous tests, I found that the issue it is likely timing related (or there is a hardware bug inside the timer). If the timer is disabled too soon, it does not work correctly anymore. I suppose that we have to wait -prescaler value- ticks of the peripherals clock (80Mhz)

For example, I changed the function hw_timer_isr_cb() with the following function

static void IRAM_ATTR hw_timer_isr_cb(void* arg)
{
    if (hw_timer_obj->cb != NULL) {
        hw_timer_obj->cb(arg);
    }
    if (!frc1.ctrl.reload) {
        frc1.ctrl.en = 0;
    }
}

(swap of the two "if") and now the timer appear to work. I don't know the true reason.

jvh003 commented 3 years ago

@dsptech - I tried that change on my NodemcuV2 board, and it's not working for me. I still see the cbCounter incrementing to a large numbers (like: 2595076). :-( I also tried on my Huzzah board - also not working. Any other ideas? ( I can work around this in my project, but it would be nice to understand and fix this ).

dsptech commented 3 years ago
I just tested again the behavior of the patched hw_timer_isr_cb. It work for me. This may be due a different sdk configuration.

Just for a try, could you test the behavior of this ?

static void IRAM_ATTR hw_timer_isr_cb(void* arg)
{
    if (hw_timer_obj->cb != NULL) {
        hw_timer_obj->cb(arg);
    }
    if (!frc1.ctrl.reload) {
        while (frc1.count.val==0);
        frc1.ctrl.en = 0;
        soc_clear_int_mask(1ul << ETS_FRC_TIMER1_INUM);
    }
}
dsptech commented 3 years ago

This patch also seems to work:

static void IRAM_ATTR hw_timer_isr_cb(void* arg)
{
    if (!frc1.ctrl.reload) {
        frc1.count.data = 0x7FFFFF;
        frc1.ctrl.en = 0;
    }
    if (hw_timer_obj->cb != NULL) {
        hw_timer_obj->cb(arg);
    }
}
jvh003 commented 3 years ago

The 1st patch works on my board. This while loop seems to do the trick. With out that part, it fails.

while (frc1.count.val==0);

The 2nd patch does not work on my board.

dsptech commented 3 years ago

I need to investigate again about the 2nd patch.

my thoughts: 1) the enable/disable bit does not fully disable the timer but it just stop the countdown. 2) prescaler = 1, 16, or 256 with cpu @80Mhz : the counter has already underflow to 0x7FFFFF BEFORE the call of the handler. 3) prescaler = 256 (cpu @160Mhz): the counter still have zero value at the time of the call, so the disabling of the timer force the counter to zero definitely. However, the internal comparator continue to work and an interrupt is fired at each timer tick (rate: 80Mhz/256)

dsptech commented 3 years ago

P.S.: you can strip strip out "soc_clear_int_mask(1ul << ETS_FRC_TIMER1_INUM);" from the first patch.

dsptech commented 3 years ago

Hi, I confirm that the 2nd patch cannot work (the counter is read only). It work (unintentionally) in my sdk due an active debug aid that induce some additional latency in the interrupts mananging. I totally forgotten it. Now I should be able to fully reproduce the issue.

I found no other way than the first patch (with the constraint of not altering the timer configuration), but I worked out the patch a bit. This new patch will speed up the timer instead of waiting for its natural underflow:

static void IRAM_ATTR hw_timer_isr_cb(void* arg)
{
    if (hw_timer_obj->cb != NULL) {
        hw_timer_obj->cb(arg);
    }
    if (!frc1.ctrl.reload) {
        /* this prevent an interrupt flood if the timer is disabled with frc1.count.val == 0 */
        if(frc1.count.val == 0){
            uint8_t divSet = frc1.ctrl.div; //expected TIMER_CLKDIV_256
            frc1.ctrl.div = TIMER_CLKDIV_1; //rush the timer and cause immediate underflow
            frc1.ctrl.div = divSet; //assume at least 2Tcy from the previous write
        }
        frc1.ctrl.en = 0;
    }

}
jvh003 commented 3 years ago

Hi @dsptech this patch 3 also works properly for me on my board - nice! Agree it's better than using the while loop. But it's not clear to me if it's safe to check that frc1.count.val will be zero. If my ISR CB function takes too long, the counter could be a different value every time, and this save & re-write of the .div will not fix the problem. What am I missing?

dsptech commented 3 years ago

Hi, the interrupt shot occur when the counter (countdown) reach the value:0.

If we disable the timer, but the counter still has zero value, the above issue will occur so a preventive correction is required. This is expected only for prescaler = 256 with very fast user callback functions.

If enough time is elapsed (slow callback functions) an underflow is already happened and the counter already has the value = 0x7FFFFF (or slightly lower due the elapsed time). In this case no correction is needed. This is also the normally expected condition.

Note: The 3rd patch is fully operative but it has a big code footprint due the concurrent volatile accesses and bit fields managements. I already written a more lightweight patch and actually I'm testing it. I need to be sure about the write timings. I hope to post it in few days.

dsptech commented 3 years ago

Hi @jvh003, I was thinking about the patch and I realized that it can break the compatibility in some condition (e.g: restart timer from the callback). I have to revert to the original scheme (disable timer as first thing).

This is now the optimized patch I actually testing:

static void IRAM_ATTR hw_timer_isr_cb(void *arg)
{
    typeof ( ((frc1_struct_t*)0)->ctrl) ctrl, ctrl2; 
    ctrl.val = frc1.ctrl.val;
    if (!ctrl.reload) {
        /* this prevent an interrupt flood if the timer is disabled with frc1.count.val == 0 */
        if (frc1.count.val == 0) {
            ctrl2.val = ctrl.val;
            ctrl2.div = TIMER_CLKDIV_1;
            frc1.ctrl.val = ctrl2.val;       //rush the timer and cause immediate underflow
        }
        ctrl.en = 0;
        frc1.ctrl.val = ctrl.val; //disable and restore original div. Assume at least 2Tcy from the previous write
    }
    if (hw_timer_obj->cb != NULL) {
        hw_timer_obj->cb(arg);
    }
}

In this version, the "control" section has an half sized footprint (inspection on disassembly) against the previous patch due the removing of some not required access to volatile bitfields. I think that this is final. Can you try this version ?

jvh003 commented 3 years ago

hi @dsptech. I tried this latest patch4 on both my esp12 boards and it works on both.

dsptech commented 3 years ago

Hi @jvh003, I checked the patch against some real situations and I discovered another flaw of the driver, and of the patch too, due this: https://github.com/espressif/ESP8266_RTOS_SDK/issues/626#issue-455083710

The above issue means that:

  1. frc1 is able to underflow many times unexpectly before the call of hw_timer_isr_cb () and we cannot assume anything about the current value of the counter. This is regardless of the current prescaler setting.
  2. frc1 is also able to fire again just before the disabling of the timer. This will set the related interrupt bit (not suppressed by _xt_isr_handler() ) and cause a double call to the user callback.
  3. it exists also a probability that "if (frc1.count.val == 0)" of the patch will fail in its purpose.

Moreover, the NMI (pwm driver) can shot anytime causing further issues about the timing.

For these reasons a different approach is required.

(Hopefully this is the latest patch to test) Note: this is only for avoiding blocking situations. There is nothing we can do about interrupt latencies.

static void IRAM_ATTR hw_timer_isr_cb(void *arg)
{
    typeof ( ((frc1_struct_t*)0)->ctrl) ctrl;
    ctrl.val = frc1.ctrl.val;
    if (!ctrl.reload) {
        ctrl.en = 0;
        frc1.ctrl.val = ctrl.val; //stop counter
        /* frc1.count.val = 0 causes continuous fire even with the timer disabled. 
         * So, an immediate reload is forced to avoid that (assuming that the current load value is valid).
         */ 
        uint32_t val = frc1.load.val; //split to suppress a warning
        frc1.load.val= val;
        soc_clear_int_mask(1ul << (ETS_FRC_TIMER1_INUM));
    }
    if (hw_timer_obj->cb != NULL) {
        hw_timer_obj->cb(arg);
    }
}
jvh003 commented 3 years ago

Hi @dsptech. Yes, this is sort-of-what I was trying to get at before when I asked the question about the: if (frc1.count.val == 0) check. Anyway, glad to see you tested it and found some other problems and then figured it out. I have tested this new patch5 on both my boards and it works!

dsptech commented 3 years ago

Hi @jvh003 I'm very sorry, I misunderstood your previous considerations about "frc1.count.val == 0".

I'm still testing the patch but I will remain a bit concerned that the workaround is due an undocumented hardware "bug" and works due undocumented hardware "features". So, the right solution could be another one and I'm not sure if it is appropriate to post a pull request.

dsptech commented 3 years ago

Hi @jvh003, I had no further issues with the last patch, so I sent the PR above.

The PR does not include the fix about the register layout, but I already fixed it (as you already done too) and a pair of functions were added. The layout fix work correctly with the edge interrupt mode, but I have no time now to perform a full test about the level interrupt mode (no users have been able to use / test it before due to the "original bug" ).

Any suggestion is appreciated, so if you wish to give a look you can find the work here: https://github.com/dsptech/ESP8266_RTOS_SDK/tree/fix_frc1_plus_register_layout Thank you. Regards.

jvh003 commented 3 years ago

Hi @dsptech. I looked the PR. Looks good to me. Thanks for you work on this problem!