espressif / esp-idf

Espressif IoT Development Framework. Official development framework for Espressif SoCs.
Apache License 2.0
13.66k stars 7.29k forks source link

保存音频 (IDFGH-9926) #11221

Open JuneSheng11 opened 1 year ago

JuneSheng11 commented 1 year ago

Is your feature request related to a problem?

使用双mic进行录音,希望可以保存双通道的原始音频,目前测试结果只是保存了单通道,如何才能保存双通道和回声消除中的参考信号的原始音频。

Describe the solution you'd like.

No response

Describe alternatives you've considered.

No response

Additional context.

No response

jason-mao commented 1 year ago

@JuneSheng11 您使用的什么环境(IDF version 或其他的软件版本)?哪个example? 硬件codec或者开发板情况?

JuneSheng11 commented 1 year ago

4.4.4版本;esp-skainet中的vioce_communication;开发板是esp32-s3-Korvo-2的开发板

BlueSkyB commented 1 year ago

vioce_communication中获取原始音频的地方是在feed_Task中,从I2S获取的是四通道音频,通过esp_get_feed_data接口后,会过滤成三通道音频。

JuneSheng11 commented 1 year ago

你好,这是我修改的代码,但是录制的音频很奇怪,存在以下情况:

  1. 保存的原始音频并不是双通道的;
  2. 录制的音频采样率不是16K; 我现在需要保存双麦原始音频,以及音频前端处理算法后的结果,特别是波束形成后的结果。我已将代码附上,您能帮忙修改一下吗

/* This example code is in the Public Domain (or CC0 licensed, at your option.)

Unless required by applicable law or agreed to in writing, this software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */

include

include

include

include "freertos/FreeRTOS.h"

include "freertos/task.h"

include "esp_wn_iface.h"

include "esp_wn_models.h"

include "dl_lib_coefgetter_if.h"

// #include "esp_afe_sr_iface.h"

include "esp_afe_sr_models.h"

include "esp_mn_iface.h"

include "esp_mn_models.h"

include "esp_board_init.h"

include "driver/i2s.h"

include "model_path.h"

include "ringbuf.h"

define DEBUG_SAVE_PCM 1 // 0

if DEBUG_SAVE_PCM

define FILES_MAX 3

ringbuf_handle_t rb_debug[FILES_MAX] = {NULL}; FILE * file_save[FILES_MAX] = {NULL};

endif

int detect_flag = 0; static esp_afe_sr_iface_t afe_handle = NULL; static esp_afe_sr_data_t afe_data = NULL; static volatile int task_flag = 0; //0

void feed_Task(void arg) { esp_afe_sr_data_t afe_data = arg; int audio_chunksize = afe_handle->get_feed_chunksize(afe_data); int nch = afe_handle->get_total_channel_num(afe_data); int feed_channel = esp_get_feed_channel(); assert(nch <= feed_channel); int16_t i2s_buff = malloc(audio_chunksize sizeof(int16_t) feed_channel); assert(i2s_buff); printf("channel %d\n",nch); while (task_flag) { esp_get_feed_data(i2s_buff, audio_chunksize sizeof(int16_t) * feed_channel); // audio_chunksize 256 feed_channel 4

    afe_handle->feed(afe_data, i2s_buff);

#if DEBUG_SAVE_PCM
    if (rb_bytes_available(rb_debug[0]) < audio_chunksize * nch * sizeof(int16_t)) {
        printf("ERROR! rb_debug[0] slow!!!\n");
    }

    rb_write(rb_debug[0], i2s_buff, audio_chunksize * nch * sizeof(int16_t), 0);
#endif
}
if (i2s_buff) {
    free(i2s_buff);
    i2s_buff = NULL;
}
vTaskDelete(NULL);

}

void detect_Task(void arg) { esp_afe_sr_data_t afe_data = arg; int afe_chunksize = afe_handle->get_fetch_chunksize(afe_data); int16_t buff = malloc(afe_chunksize sizeof(int16_t)); assert(buff); printf("------------detect start------------\n");

while (task_flag) {
    afe_fetch_result_t* res = afe_handle->fetch(afe_data); 
    if (res && res->ret_value != ESP_FAIL) {
        memcpy(buff, res->data, afe_chunksize * sizeof(int16_t));

    #if DEBUG_SAVE_PCM
        if (rb_bytes_available(rb_debug[1]) < afe_chunksize * 1 * sizeof(int16_t)) {
            printf("ERROR! rb_debug[1] slow!!!\n");
        }

        rb_write(rb_debug[1], buff, afe_chunksize * 1 * sizeof(int16_t), 0);
    #endif
    }
}
if (buff) {
    free(buff);
    buff = NULL;
}
vTaskDelete(NULL);

}

if DEBUG_SAVE_PCM

void debug_pcm_save_Task(void arg) { int size = 2 2 32 16; // It's 32ms for 4 channels, 4k bytes 4 2 32 16 int16_t buf_temp = heap_caps_calloc(1, size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);

while (task_flag) {
    for (int i = 0; i < FILES_MAX; i++) {
        if (file_save[i] != NULL) {
            if (rb_bytes_filled(rb_debug[i]) > size) {
                int ret = rb_read(rb_debug[i], buf_temp, size, 3000 / portTICK_PERIOD_MS);
                if ((ret < 0) || (ret < size)) {
                    // ESP_LOGE(TAG, "rb_debug read error, ret: %d\n", ret);
                    vTaskDelay(10 / portTICK_RATE_MS);
                    continue;
                }
                FatfsComboWrite(buf_temp, size, 1, file_save[i]);

            }
        }
    }
    vTaskDelay(1 / portTICK_RATE_MS);
}

free(buf_temp);
vTaskDelete(NULL);

}

endif

void app_main() {

ESP_ERROR_CHECK(esp_board_init(AUDIO_HAL_08K_SAMPLES, 1, 16));

if DEBUG_SAVE_PCM

ESP_ERROR_CHECK(esp_sdcard_init("/sdcard", 10));  //10

endif

afe_handle = (esp_afe_sr_iface_t *)&ESP_AFE_VC_HANDLE;
afe_config_t afe_config = AFE_CONFIG_DEFAULT();
afe_config.vad_init = false;
afe_config.wakenet_init = false;
afe_config.voice_communication_init = true;

afe_config.se_init = true;  // 配置是否使能 BSS/NS
afe_config.voice_communication_agc_init = false;// 配置是否使能语音通话中 AGC
afe_config.voice_communication_agc_gain = 15; // 配置 AGC 的增益值(单位为 dB)
// 配置 AFE 工作模式(SR_MODE_LOW_COST 或 SR_MODE_HIGH_PERF)
afe_config.afe_mode = SR_MODE_HIGH_PERF;
afe_config.agc_mode = AFE_MN_PEAK_AGC_MODE_2;// 配置音频线性放大 Level
afe_config.aec_init = false;
afe_config.pcm_config.total_ch_num = 2;
afe_config.pcm_config.mic_num = 2;
afe_config.pcm_config.ref_num = 0;

if defined CONFIG_ESP32_S3_BOX_BOARD || defined CONFIG_ESP32_S3_EYE_BOARD

afe_config.aec_init = false;

#if defined CONFIG_ESP32_S3_EYE_BOARD
    printf("init \n");
    afe_config.pcm_config.total_ch_num = 2;
    afe_config.pcm_config.mic_num = 2;
    afe_config.pcm_config.ref_num = 0;
#endif

endif

afe_data = afe_handle->create_from_config(&afe_config);
if (afe_data == NULL) {
    printf("create_from_config fail!\n");
    return;
}

if DEBUG_SAVE_PCM

// rb_debug[0] = rb_create(afe_handle->get_total_channel_num(afe_data) 4 16000 2, 1); // 4s ringbuf rb_debug[0] = rb_create(afe_handle->get_total_channel_num(afe_data) 4 16000 2, 1); // 4s ringbuf file_save[0] = fopen("/sdcard/feed_1.pcm", "w"); if (file_save[0] == NULL) printf("can not open file\n");

rb_debug[1] = rb_create(afe_handle->get_total_channel_num(afe_data) * 4 * 16000 * 2, 1);   // 4s ringbuf
file_save[1] = fopen("/sdcard/feed_2.pcm", "w");
if (file_save[1] == NULL) printf("can not open file\n");

rb_debug[2] = rb_create(1 * 4 * 16000 * 2, 1);   // 4s ringbuf
file_save[2] = fopen("/sdcard/fetch6.pcm", "w");
if (file_save[2] == NULL) printf("can not open file\n");

xTaskCreatePinnedToCore(&debug_pcm_save_Task, "debug_pcm_save", 2 * 1024, NULL, 5, NULL, 1);

endif

task_flag = 1;
xTaskCreatePinnedToCore(&feed_Task, "feed", 8 * 1024, (void*)afe_data, 5, NULL, 0);
xTaskCreatePinnedToCore(&detect_Task, "detect", 8 * 1024, (void*)afe_data, 5, NULL, 1);

}

BlueSkyB commented 1 month ago

voice_communication 并不经过 BSS,speech recognition 那边,当双麦时,才会经过 BSS。 所以需要将 afe_handle = (esp_afe_sr_iface_t *)&ESP_AFE_VC_HANDLE; 改为: afe_handle = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE;

同时将音频通道改为: afe_config.pcm_config.total_ch_num = 3; afe_config.pcm_config.mic_num = 2; afe_config.pcm_config.ref_num = 1;

因为 skainet 中默认的korvo 2板子驱动,获得音频是有回踩信号的。这里的配置需要与实际音频相匹配。如果是自己的音频驱动,配置也需要与喂入AFE的实际音频情况相匹配。 fetch 获取的音频是算法处理后的单通道音频。要获取原始音频,保存喂给 afe 的音频。