espressif / esp-adf

Espressif Audio Development Framework
Other
1.56k stars 690 forks source link

Crash when trying to playback M4A files, works fine with mp3 (AUD-5771) #1295

Closed nathan-swidget closed 1 month ago

nathan-swidget commented 1 month ago

Hey,

I seem to be unable to playback M4A audio files. I have tried samples provided by espressif, aswell as samples created with Audacity.

#include "audio_element.h"
#include "audio_pipeline.h"
#include "audio_event_iface.h"
#include "audio_mem.h"
#include "audio_common.h"
#include "i2s_stream.h"
#include "mp3_decoder.h"
#include "aac_decoder.h"
#include "board.h"
#include <string.h>

#define USE_M4A 1
#if USE_M4A
    extern const uint8_t audio_start_asm[] asm("_binary_test_m4a_start");
    extern const uint8_t audio_end_asm[] asm("_binary_test_m4a_end");
#else
    extern const uint8_t audio_start_asm[] asm("_binary_test_mp3_start");
    extern const uint8_t audio_end_asm[] asm("_binary_test_mp3_end");
#endif  // USE_M4A

static int bytesConsumed = 0;
extern "C" audio_element_err_t read_audio_from_flash(audio_element_handle_t el, char *buf, int len, TickType_t wait_time, void *ctx) {
    size_t remaining = audio_end_asm - audio_start_asm - bytesConsumed;
    int read_size = remaining;
    if (read_size == 0) {
        return AEL_IO_DONE;
    } else if (len < read_size) {
        read_size = len;
    }

    memcpy(buf, audio_start_asm + bytesConsumed, read_size);
    bytesConsumed += read_size;
    return (audio_element_err_t)read_size;
}

static i2s_stream_cfg_t getDefaultI2CConfig() {
    i2s_stream_cfg_t cfg{
        .type = AUDIO_STREAM_WRITER,
        .transmit_mode = I2S_COMM_MODE_STD,
        .chan_cfg = {
            .id = I2S_NUM_0,
            .role = I2S_ROLE_MASTER,
            .dma_desc_num = 3,
            .dma_frame_num = 312,
            .auto_clear = true
        },
        .std_cfg = {
            .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(16000),
            .slot_cfg = {
                .data_bit_width = I2S_DATA_BIT_WIDTH_16BIT,
                .slot_bit_width = I2S_SLOT_BIT_WIDTH_AUTO,
                .slot_mode = I2S_SLOT_MODE_MONO,
                .slot_mask = I2S_STD_SLOT_RIGHT,
                .ws_width = I2S_DATA_BIT_WIDTH_16BIT,
                .ws_pol = false,
                .bit_shift = true,
                #if SOC_I2S_HW_VERSION_1
                .msb_right = true,
                #else
                .left_align = true,
                .big_endian = false,
                .bit_order_lsb = false
                #endif
            },
            .gpio_cfg = { // this is loaded from custom board info
                .invert_flags = {
                    .mclk_inv = false,
                    .bclk_inv = false,
                }
            }
        },
        .use_alc = true,
        .volume = 100,
        .out_rb_size = I2S_STREAM_RINGBUFFER_SIZE,                                 
        .task_stack = I2S_STREAM_TASK_STACK,                                       
        .task_core = I2S_STREAM_TASK_CORE,                                         
        .task_prio = I2S_STREAM_TASK_PRIO,                                         
        .stack_in_ext = false,                                                     
        .multi_out_num = 0,                                                        
        .uninstall_drv = true,                                                     
        .need_expand = false,                                                      
        .buffer_len = I2S_STREAM_BUF_SIZE                                     
    };

    return cfg;
}

void app_main() {
    audio_board_handle_t board_handle = audio_board_init();
    audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_DECODE, AUDIO_HAL_CTRL_START);

    audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
    audio_pipeline_handle_t pipeline = audio_pipeline_init(&pipeline_cfg);

    i2s_stream_cfg_t i2s_cfg = getDefaultI2CConfig();
    audio_element_handle_t i2sWriter = i2s_stream_init(&i2s_cfg);
    i2s_stream_set_clk(i2sWriter, 16000, 16, 1);

    audio_element_handle_t decoder;
    #if USE_M4A
    aac_decoder_cfg_t aac_dec_cfg  = DEFAULT_AAC_DECODER_CONFIG();
    decoder = aac_decoder_init(&aac_dec_cfg);
    #else
    mp3_decoder_cfg_t mp3_cfg = DEFAULT_MP3_DECODER_CONFIG();
    decoder = mp3_decoder_init(&mp3_cfg);
    #endif  // USE_M4A

    audio_element_set_read_cb(decoder, read_audio_from_flash, NULL);

    const char *link_tag[2] = {"dec",  "i2s"};

    audio_pipeline_register(pipeline, decoder, "dec");
    audio_pipeline_register(pipeline, i2sWriter, "i2s");

    audio_pipeline_link(pipeline, &link_tag[0], 2);
    audio_event_iface_cfg_t evt_cfg = AUDIO_EVENT_IFACE_DEFAULT_CFG();
    audio_event_iface_handle_t eventHandle = audio_event_iface_init(&evt_cfg);
    audio_pipeline_set_listener(pipeline, eventHandle);
    audio_pipeline_run(pipeline);
    while(1) {
        audio_event_iface_msg_t msg;
        esp_err_t ret = audio_event_iface_listen(eventHandle, &msg, portMAX_DELAY);
        if (ret != ESP_OK) continue;
        if (msg.cmd == AEL_MSG_CMD_STOP) {
            break;
        }
        if (msg.source_type == AUDIO_ELEMENT_TYPE_ELEMENT && msg.cmd == AEL_MSG_CMD_REPORT_MUSIC_INFO) {
            audio_element_info_t music_info = {0};
            audio_element_getinfo((audio_element_handle_t)msg.source, &music_info);
            i2s_stream_set_clk(i2sWriter, music_info.sample_rates, music_info.bits, music_info.channels);
            continue;
        }

        if (msg.source_type == AUDIO_ELEMENT_TYPE_ELEMENT && msg.source == (void *) i2sWriter
            && msg.cmd == AEL_MSG_CMD_REPORT_STATUS) {
            if ((int)msg.data == AEL_STATUS_STATE_FINISHED || (int)msg.data == AEL_STATUS_STATE_STOPPED) {
                break;
            }
        }
    }

    audio_pipeline_stop(pipeline);
    audio_pipeline_wait_for_stop(pipeline);
    audio_pipeline_unlink(pipeline);

    audio_pipeline_unregister(pipeline, i2sWriter);
    audio_element_deinit(i2sWriter);

    audio_pipeline_unregister(pipeline, decoder);
    audio_element_deinit(decoder);

    audio_pipeline_remove_listener(pipeline);
}

The above code works to play MP3 files, but not M4A (selected by changed the define)


D (1802) intr_alloc: Connected src 79 to int 3 (cpu 0)
D (1807) app_start: Starting scheduler on CPU0
D (1811) intr_alloc: Connected src 57 to int 5 (cpu 0)
D (1811) intr_alloc: Connected src 80 to int 0 (cpu 1)
D (1816) app_start: Starting scheduler on CPU1
D (1820) intr_alloc: Connected src 58 to int 1 (cpu 1)
I (1811) main_task: Started on CPU0
D (1829) heap_init: New heap initialised at 0x3fce9710
I (1833) esp_psram: Reserving pool of 32K of internal memory for DMA/internal allocations
D (1841) esp_psram: Allocating block of size 32768 bytes
I (1846) main_task: Calling app_main()
I (1850) new_codec: new_codec init
I (1853) AUDIO_HAL: Codec mode is 2, Ctrl:1
D (1857) i2s_common: tx channel is registered on I2S0 successfully
D (1863) i2s_common: DMA malloc info: dma_desc_num = 3, dma_desc_buf_size = dma_frame_num * slot_num * data_bit_width = 624
D (1874) i2s_std: Clock division info: [sclk] 160000000 Hz [mdiv] 39 [mclk] 4096000 Hz [bdiv] 8 [bclk] 512000 Hz
D (1884) gdma: new group (0) at 0x3c1c0f8c
D (1887) gdma: new pair (0,0) at 0x3c1c1018
D (1891) gdma: new tx channel (0,0) at 0x3c1c0f54
D (1896) intr_alloc: Connected src 71 to int 8 (cpu 0)
D (1900) gdma: install interrupt service for tx channel (0,0)
D (1906) i2s_std: The tx channel on I2S0 has been initialized to STD mode successfully
D (1913) i2s_common: i2s tx channel enabled
D (1917) i2s_common: i2s tx channel disabled
D (1921) i2s_std: Clock division info: [sclk] 160000000 Hz [mdiv] 39 [mclk] 4096000 Hz [bdiv] 8 [bclk] 512000 Hz
D (1931) i2s_common: i2s tx channel enabled
I (1936) AUDIO_PIPELINE: link el->rb, el:0x3c1c10bc, tag:dec, rb:0x3c1c1218
D (1942) AUDIO_PIPELINE: FUNC:audio_pipeline_link, LINE:516
D (1947) AUDIO_PIPELINE: el-list: linked:1, kept:0, el:0x3c1c10bc,              dec, in_rb:0, out_rb:0x3c1c1218
D (1957) AUDIO_PIPELINE: el-list: linked:1, kept:0, el:0x3c1c0c40,              i2s, in_rb:0x3c1c1218, out_rb:0
D (1967) AUDIO_PIPELINE: rb-list: linked:1, kept:0, rb:0x3c1c1218, host_el:0x3c1c10bc,              dec
D (1976) AUDIO_PIPELINE: start el[             dec], linked:1, state:1,[0x3c1c10bc], 
I (1983) AUDIO_THREAD: The dec task allocate stack on external memory
I (1990) AUDIO_ELEMENT: [dec-0x3c1c10bc] Element task created
D (1995) AUDIO_PIPELINE: start el[             i2s], linked:1, state:1,[0x3c1c0c40], 
I (2003) AUDIO_THREAD: The i2s task allocate stack on internal memory
I (2009) AUDIO_ELEMENT: [i2s-0x3c1c0c40] Element task created
I (2014) AUDIO_PIPELINE: Func:audio_pipeline_run, Line:359, MEM Total:2402852 Bytes, Inter:353279 Bytes, Dram:353279 Bytes, Dram largest free:zuBytes

D (2027) AUDIO_PIPELINE: resume,linked:1, state:1,[dec-0x3c1c10bc]
I (2033) AUDIO_ELEMENT: [dec] AEL_MSG_CMD_RESUME,state:1
I (2039) CODEC_ELEMENT_HELPER: The element is 0x3c1c10bc. The reserve data 2 is 0.
I (2047) AAC_DECODER: A new song playing
Guru Meditation Error: Core  0 panic'ed (LoadProhibited). Exception was unhandled.

Core  0 register dump:
PC      : 0x42010838  PS      : 0x00060730  A0      : 0x82014e6c  A1      : 0x3c1c2a70  
A2      : 0x00000000  A3      : 0x3c1c2a90  A4      : 0x00000003  A5      : 0x00000001  
A6      : 0x00000000  A7      : 0x3c1c4568  A8      : 0x82010838  A9      : 0x3c1c2a20  
A10     : 0x00000010  A11     : 0x00000000  A12     : 0x3c1c3ce8  A13     : 0x00000001  
0x42010838: audio_element_abort_output_ringbuf at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:757

A14     : 0x00000000  A15     : 0x00000000  SAR     : 0x00000004  EXCCAUSE: 0x0000001c  
EXCVADDR: 0x00000020  LBEG    : 0x400570e8  LEND    : 0x400570f3  LCOUNT  : 0x00000000  

Backtrace: 0x42010835:0x3c1c2a70 0x42014e69:0x3c1c2a90 0x42014fbb:0x3c1c2ac0 0x4201590f:0x3c1c2b70 0x42012dd8:0x3c1c2bb0 0x42013045:0x3c1c2c50 0x42010344:0x3c1c2c70 0x420108b3:0x3c1c2ca0 0x42010a21:0x3c1c2cc0 0x420118fd:0x3c1c2cf0 0x42010aff:0x3c1c2d30
0x400570e8: memset in ROM
0x400570f3: memset in ROM

0x42010835: audio_element_abort_output_ringbuf at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:756
0x42014e69: mp4_parser_seek at /builds/adf/esp-adf-libs-source/esp_processing/esp-share/mp4_parser.c:762
0x42014fbb: mp4_parser_info_init at /builds/adf/esp-adf-libs-source/esp_processing/esp-share/mp4_parser.c:817
0x4201590f: mp4_parser_parse_header at /builds/adf/esp-adf-libs-source/esp_processing/esp-share/mp4_parser.c:1307
0x42012dd8: aac_decoder_open at /builds/adf/esp-adf-libs-source/esp_processing/esp-wrapper/aac_decoder.c:324
0x42013045: _aac_decoder_open at /builds/adf/esp-adf-libs-source/esp_processing/esp-wrapper/aac_decoder.c:798
0x42010344: audio_element_process_init at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:175
0x420108b3: audio_element_on_cmd_resume at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:279
0x42010a21: audio_element_on_cmd at /home/spades/projects/esp32/esp-adf/components/audio
_pipeline/audio_element.c:320
0x420118fd: audio_event_iface_waiting_cmd_msg at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_event_iface.c:246
0x42010aff: audio_element_task at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:473

I'm at a loss as to what the problem is.

hbler99 commented 1 month ago

Please provide information about your board as well as ESP-IDF and ESP-ADF to help solve the problem.

nathan-swidget commented 1 month ago

The board is a custom board using the ESP-S3-PICO-1 ESP-IDF: release/v5.3 (707d097b01756687cca18be855a2675d150247ae)
ESP-ADF: v2.7 (9cf556de500019bb79f3bb84c821fda37668c052)

Here is a sample repo that can be used to recreate the issue: https://github.com/nathan-swidget/esp_audio_test

Some additional information: The AAC sample file (from https://docs.espressif.com/projects/esp-adf/en/latest/design-guide/audio-samples.html) plays as well. So, MP3 and AAC works, but M4A crashes.

Log from running sample (same result as noted in opening comment)

SPIWP:0xee
mode:DIO, clock div:1
load:0x3fce2810,len:0x10dc
load:0x403c8700,len:0x4
load:0x403c8704,len:0xc98
load:0x403cb700,len:0x2d10
entry 0x403c8904
I (31) boot: ESP-IDF v5.3-383-g0bbd728196-dirty 2nd stage bootloader
I (31) boot: compile time Sep 24 2024 10:42:07
I (31) boot: Multicore bootloader
I (33) boot: chip revision: v0.2
I (36) boot.esp32s3: Boot SPI Speed : 80MHz
I (40) boot.esp32s3: SPI Mode       : DIO
I (44) boot.esp32s3: SPI Flash Size : 8MB
I (47) boot: Enabling RNG early entropy source...
I (52) boot: Partition Table:
I (54) boot: ## Label            Usage          Type ST Offset   Length
I (61) boot:  0 nvs              WiFi data        01 02 00009000 00004000
I (67) boot:  1 otadata          OTA data         01 00 0000d000 00002000
I (74) boot:  2 phy_init         RF data          01 01 0000f000 00001000
I (80) boot:  3 ota_0            OTA app          00 10 00010000 00300000
I (87) boot:  4 ota_1            OTA app          00 11 00310000 00300000
I (93) boot:  5 factory_nvs      WiFi data        01 02 00610000 00004000
I (100) boot:  6 nvs_keys         NVS keys         01 04 00614000 00001000
I (106) boot:  7 storage          Unknown data     01 82 00615000 001eb000
I (113) boot: End of partition table
I (116) esp_image: segment 0: paddr=00010020 vaddr=3c040020 size=587b0h (362416) map
I (189) esp_image: segment 1: paddr=000687d8 vaddr=3fc94f00 size=02c4ch ( 11340) load
I (192) esp_image: segment 2: paddr=0006b42c vaddr=40374000 size=04bech ( 19436) load
I (197) esp_image: segment 3: paddr=00070020 vaddr=42000020 size=3d6c4h (251588) map
I (245) esp_image: segment 4: paddr=000ad6ec vaddr=40378bec size=0c2bch ( 49852) load
I (263) boot: Loaded app from partition at offset 0x10000
I (264) boot: Disabling RNG early entropy source...
I (274) esp_psram: Found 2MB PSRAM device
I (274) esp_psram: Speed: 40MHz
I (274) cpu_start: Multicore app
I (694) esp_psram: SPI SRAM memory test OK
I (703) cpu_start: Pro cpu start user code
I (703) cpu_start: cpu freq: 160000000 Hz
I (703) app_init: Application information:
I (706) app_init: Project name:     audio_tst
I (711) app_init: App version:      3d7e0f7-dirty
I (716) app_init: Compile time:     Oct 17 2024 12:12:27
I (722) app_init: ELF file SHA256:  84d227049...
I (727) app_init: ESP-IDF:          v5.3-783-g707d097b01-dirty
I (734) efuse_init: Min chip rev:     v0.0
I (739) efuse_init: Max chip rev:     v0.99 
I (743) efuse_init: Chip rev:         v0.2
I (748) heap_init: Initializing. RAM available for dynamic allocation:
I (756) heap_init: At 3FC985F8 len 00051118 (324 KiB): RAM
I (762) heap_init: At 3FCE9710 len 00005724 (21 KiB): RAM
I (768) heap_init: At 3FCF0000 len 00008000 (32 KiB): DRAM
I (774) heap_init: At 600FE100 len 00001EE8 (7 KiB): RTCRAM
I (780) esp_psram: Adding pool of 2048K of PSRAM memory to heap allocator
I (789) spi_flash: detected chip: gd
I (792) spi_flash: flash io: dio
I (796) sleep: Configure to isolate all GPIO pins in sleep state
I (803) sleep: Enable automatic switching of GPIO sleep configuration
I (810) main_task: Started on CPU0
I (831) esp_psram: Reserving pool of 32K of internal memory for DMA/internal allocations
I (832) main_task: Calling app_main()
I (833) new_codec: new_codec init
I (837) AUDIO_HAL: Codec mode is 2, Ctrl:1
I (844) AUDIO_PIPELINE: link el->rb, el:0x3c0a0fc0, tag:dec, rb:0x3c0a111c
I (850) AUDIO_THREAD: The dec task allocate stack on external memory
I (857) AUDIO_ELEMENT: [dec-0x3c0a0fc0] Element task created
I (863) AUDIO_THREAD: The i2s task allocate stack on internal memory
I (870) AUDIO_ELEMENT: [i2s-0x3c0a0c40] Element task created
I (876) AUDIO_PIPELINE: Func:audio_pipeline_run, Line:359, MEM Total:2418272 Bytes, Inter:368447 Bytes, Dram:368447 Bytes, Dram largest free:270336Bytes

I (891) AUDIO_ELEMENT: [dec] AEL_MSG_CMD_RESUME,state:1
I (897) CODEC_ELEMENT_HELPER: The element is 0x3c0a0fc0. The reserve data 2 is 0x0.
I (905) AAC_DECODER: A new song playing
Guru Meditation Error: Core  0 panic'ed (LoadProhibited). Exception was unhandled.

Core  0 register dump:
PC      : 0x4200c2d2  PS      : 0x00060f30  A0      : 0x8201089c  A1      : 0x3c0a2950  
0x4200c2d2: audio_element_abort_output_ringbuf at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:760

A2      : 0x00000000  A3      : 0x3c0a2970  A4      : 0x00000003  A5      : 0x00000001  
A6      : 0x00000000  A7      : 0x3c0a446c  A8      : 0x00000001  A9      : 0x00000000  
A10     : 0x00000001  A11     : 0x00000000  A12     : 0x3c0a3bec  A13     : 0x00000001  
A14     : 0x00000000  A15     : 0x3fc98d8c  SAR     : 0x00000001  EXCCAUSE: 0x0000001c  
EXCVADDR: 0x00000020  LBEG    : 0x400570e8  LEND    : 0x400570f3  LCOUNT  : 0x00000000  
0x400570e8: memset in ROM
0x400570f3: memset in ROM

Backtrace: 0x4200c2cf:0x3c0a2950 0x42010899:0x3c0a2970 0x420109eb:0x3c0a29a0 0x4201133f:0x3c0a2a50 0x4200e808:0x3c0a2a90 0x4200ea75:0x3c0a2b30 0x4200bde3:0x3c0a2b50 0x4200c354:0x3c0a2b80 0x4200c462:0x3c0a2ba0 0x4200d349:0x3c0a2bd0 0x4200c67b:0x3c0a2c10 0x4037c72d:0x3c0a2c40

0x4200c2cf: audio_element_abort_output_ringbuf at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:759
0x42010899: mp4_parser_seek at /builds/adf/esp-adf-libs-source/esp_processing/esp-share/mp4_parser.c:762
0x420109eb: mp4_parser_info_init at /builds/adf/esp-adf-libs-source/esp_processing/esp-share/mp4_parser.c:817
0x4201133f: mp4_parser_parse_header at /builds/adf/esp-adf-libs-source/esp_processing/esp-share/mp4_parser.c:1307
0x4200e808: aac_decoder_open at /builds/adf/esp-adf-libs-source/esp_processing/esp-wrapper/aac_decoder.c:324
0x4200ea75: _aac_decoder_open at /builds/adf/esp-adf-libs-source/esp_processing/esp-wrapper/aac_decoder.c:798
0x4200bde3: audio_element_process_init at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:175
0x4200c354: audio_element_on_cmd_resume at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:283
0x4200c462: audio_element_on_cmd at /home/spades/projects/esp32/esp-adf/components/audio
ELF file SHA256: 84d227049
_pipeline/audio_element.c:324
0x4200d349: audio_event_iface_waiting_cmd_msg at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_event_iface.c:246
0x4200c67b: audio_element_task at /home/spades/projects/esp32/esp-adf/components/audio_pipeline/audio_element.c:477
0x4037c72d: vPortTaskWrapper at /home/spades/projects/esp32/esp-idf/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c:134

Rebooting...
ESP-ROM:esp32s3-20210327
Build:Mar 27 2021
rst:0xc (RTC_SW_CPU_RST),boot:0x8 (SPI_FAST_FLASH_BOOT)
Saved PC:0x40375b08
0x40375b08: esp_restart_noos at /home/spades/projects/esp32/esp-idf/components/esp_system/port/soc/esp32s3/system_internal.c:158
TempoTian commented 1 month ago

For this M4A have MDAT before MOOV to support it input element should support seek image

I have modified your code to let it work, you can try to use it instead. Please rename it firstly. audio_main c