xn67744 / unimrcp

unimrcp对接funasr
Apache License 2.0
5 stars 0 forks source link

webrtcvad问题 #1

Open najsword opened 5 months ago

najsword commented 5 months ago

一建立通道就会触发activity,要怎么解决啊、

xn67744 commented 4 months ago

核心的问题是webrtc的vad检测的问题,建议换成webrtc的vad的检测算法,mpf_activity_detector的代码,你可以参考下/*

include "mpf_activity_detector.h"

include "apt_log.h"

include "webrtc_vad.h"

/ Detector states */ typedef enum { DETECTOR_STATE_INACTIVITY, /*< inactivity detected / DETECTOR_STATE_ACTIVITY_TRANSITION, /< activity detection is in-progress */ DETECTOR_STATE_ACTIVITY, /*< activity detected / DETECTOR_STATE_INACTIVITY_TRANSITION /*< inactivity detection is in-progress / } mpf_detector_state_e;

/* Activity detector / struct mpf_activity_detector_t { / voice activity (silence) level threshold / apr_size_t level_threshold;

/* period of activity required to complete transition to active state */
apr_size_t           speech_timeout;
/* period of inactivity required to complete transition to inactive state */
apr_size_t           silence_timeout;
/* noinput timeout */
apr_size_t           noinput_timeout;

/* current state */
mpf_detector_state_e state;
/* duration spent in current state  */
apr_size_t           duration;
/* frame duration  */
apr_size_t           frame_duration;

};

FILE* globalFile = NULL;

/* Create activity detector / MPF_DECLARE(mpf_activity_detector_t) mpf_activity_detector_create(apr_pool_t pool) { mpf_activity_detector_t detector = apr_palloc(pool,sizeof(mpf_activity_detector_t)); detector->level_threshold = 2; / 0 .. 255 / detector->speech_timeout = 200; / 0.2 s / detector->silence_timeout = 200; / 0.2 s / detector->noinput_timeout = 5000; / 5 s */ detector->duration = 0; detector->frame_duration = CODEC_FRAME_TIME_BASE; detector->state = DETECTOR_STATE_INACTIVITY; return detector; }

/* Reset activity detector / MPF_DECLARE(void) mpf_activity_detector_reset(mpf_activity_detector_t *detector) { detector->duration = 0; detector->state = DETECTOR_STATE_INACTIVITY; }

/* Set threshold of voice activity (silence) level / MPF_DECLARE(void) mpf_activity_detector_level_set(mpf_activity_detector_t *detector, apr_size_t level_threshold) { detector->level_threshold = level_threshold; }

/* Set noinput timeout / MPF_DECLARE(void) mpf_activity_detector_noinput_timeout_set(mpf_activity_detector_t *detector, apr_size_t noinput_timeout) { detector->noinput_timeout = noinput_timeout; }

/* Set timeout required to trigger speech (transition from inactive to active state) / MPF_DECLARE(void) mpf_activity_detector_speech_timeout_set(mpf_activity_detector_t *detector, apr_size_t speech_timeout) { detector->speech_timeout = speech_timeout; }

/* Set timeout required to trigger silence (transition from active to inactive state) / MPF_DECLARE(void) mpf_activity_detector_silence_timeout_set(mpf_activity_detector_t *detector, apr_size_t silence_timeout) { detector->silence_timeout = silence_timeout; }

MPF_DECLARE(void) mpf_activity_frame_duration_set(mpf_activity_detector_t *detector, apr_size_t frame_duration) { detector->frame_duration = frame_duration; }

static APR_INLINE void mpf_activity_detector_state_change(mpf_activity_detector_t *detector, mpf_detector_state_e state) { detector->duration = 0; detector->state = state; }

static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t frame) { apr_size_t samplesCount = frame->codec_frame.size / 2; int per_ms_frames = 10; apr_size_t sampleRate = 8000; size_t samples = sampleRate per_ms_frames / 1000; if (samples == 0) { apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,samples== 0 "); return (apr_size_t)-1; } size_t nTotal = (samplesCount / samples); int16_t input = frame->codec_frame.buffer; VadInst vadInst; if (WebRtcVad_Create(&vadInst)) { apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,WebRtcVad_Init != 0 "); return (apr_size_t)-1; } int status = WebRtcVad_Init(vadInst); if (status != 0) { WebRtcVad_Free(vadInst); apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,WebRtcVad_Init != 0 "); return (apr_size_t)-1; }

int16_t vad_mode = 1;
status = WebRtcVad_set_mode(vadInst, vad_mode);
if (status != 0) {
    WebRtcVad_Free(vadInst);
    apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,WebRtcVad_set_mode != 0 ");
    return (apr_size_t)-1;
}
size_t cnt = 0;
size_t i = 0;
if (nTotal > 0) {
    for (i = 0; i < nTotal; i++) {
        //int keep_weight = 0;
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
        // printf("==========%d=============== \n", nVadRet);
        if (nVadRet == -1) {
            WebRtcVad_Free(vadInst);
            apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate,返回-1");
            return (apr_size_t)-1;
        }
        else {
            if (nVadRet >= 1) {
                cnt++;
            }
        }
        input += samples;
    }
    WebRtcVad_Free(vadInst);
    if (cnt < nTotal / 10) {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回0,有效帧数量小于总帧10分之一");
        return 0;
    }
    else {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回1,有效帧数量超过总帧10分之一");
        return 1;
    }
}
if (nTotal == 0) {
    int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samplesCount);
    //printf("==========%d=============== \n", nVadRet);
    //apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回%d,nTotal==0", nVadRet);
    apt_log(APT_LOG_MARK, APT_PRIO_ERROR, "VAD结果 = %d, frame大小= %d", nVadRet, frame->codec_frame.size);
    //if (globalFile == NULL) {
    //    globalFile = fopen("D://codes/workspace-c++/unimrcp/unimrcp-1.8.0/Debug/var/test.pcm", "wb");
    //} 
    //fwrite(frame->codec_frame.buffer, 1, frame->codec_frame.size, globalFile);
    WebRtcVad_Free(vadInst);
    return nVadRet;
}

return 0;

}

MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t detector, const mpf_frame_t frame) { mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE; apr_size_t level = 0; if ((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) { level = mpf_activity_detector_level_calculate(frame);

if 0

    apt_log(APT_LOG_MARK, APT_PRIO_INFO, "Activity Detector --------------------- [%"APR_SIZE_T_FMT"]", level);

endif

}

if (detector->state == DETECTOR_STATE_INACTIVITY) {
    if (level >= 1) {
        apt_log(APT_LOG_MARK,APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_ACTIVITY_TRANSITION---------------- [%"APR_SIZE_T_FMT"]",level);
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_ACTIVITY_TRANSITION);
    }
    else {
        detector->duration += CODEC_FRAME_TIME_BASE;
        if (detector->duration >= detector->noinput_timeout) {
            det_event = MPF_DETECTOR_EVENT_NOINPUT;
        }
    }
}
else if (detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
    if (level >= 1) {
        detector->duration += CODEC_FRAME_TIME_BASE;
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_ACTIVITY-------11111--------- [%"APR_SIZE_T_FMT"]",level);
        if (detector->duration >= detector->speech_timeout) {
            det_event = MPF_DETECTOR_EVENT_ACTIVITY;
            mpf_activity_detector_state_change(detector, DETECTOR_STATE_ACTIVITY);
        }
    }
    else {
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_INACTIVITY);
    }
}
else if (detector->state == DETECTOR_STATE_ACTIVITY) {
    if (level >= 1) {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_ACTIVITY--------2222-------- [%"APR_SIZE_T_FMT"]",level);
        detector->duration += CODEC_FRAME_TIME_BASE;
    }
    else {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_INACTIVITY_TRANSITION---------------- [%"APR_SIZE_T_FMT"]",level);
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_INACTIVITY_TRANSITION);
    }
}
else if (detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
    if (level >= 1) {
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_ACTIVITY);
    }
    else {
        detector->duration += CODEC_FRAME_TIME_BASE;
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "Activity STATE duration:%d, silence_timeout:%d)", detector->duration, detector->silence_timeout);
        if (detector->duration >= detector->silence_timeout) {
            apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_INACTIVITY---------------- [%"APR_SIZE_T_FMT"]",level);
            det_event = MPF_DETECTOR_EVENT_INACTIVITY;
            mpf_activity_detector_state_change(detector, DETECTOR_STATE_INACTIVITY);
        }
    }
}
return det_event;

}

najsword commented 4 months ago

核心的问题是webrtc的vad检测的问题,建议换成webrtc的vad的检测算法,mpf_activity_detector的代码,你可以参考下/*

  • Copyright 2008-2015 Arsen Chaloyan
  • Licensed under the Apache License, Version 2.0 (the "License");
  • you may not use this file except in compliance with the License.
  • You may obtain a copy of the License at
  • http://www.apache.org/licenses/LICENSE-2.0
  • Unless required by applicable law or agreed to in writing, software
  • distributed under the License is distributed on an "AS IS" BASIS,
  • WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  • See the License for the specific language governing permissions and
  • limitations under the License. */

include "mpf_activity_detector.h" #include "apt_log.h" #include "webrtc_vad.h"

/ Detector states */ typedef enum { DETECTOR_STATE_INACTIVITY, /*< inactivity detected / DETECTOR_STATE_ACTIVITY_TRANSITION, /< activity detection is in-progress */ DETECTOR_STATE_ACTIVITY, /*< activity detected / DETECTOR_STATE_INACTIVITY_TRANSITION /*< inactivity detection is in-progress / } mpf_detector_state_e;

/* Activity detector _/ struct mpf_activity_detectort { / voice activity (silence) level threshold / apr_size_t level_threshold;

/* period of activity required to complete transition to active state */
apr_size_t           speech_timeout;
/* period of inactivity required to complete transition to inactive state */
apr_size_t           silence_timeout;
/* noinput timeout */
apr_size_t           noinput_timeout;

/* current state */
mpf_detector_state_e state;
/* duration spent in current state  */
apr_size_t           duration;
/* frame duration  */
apr_size_t           frame_duration;

};

FILE* globalFile = NULL;

/* Create activity detector _/ MPF_DECLARE(mpf_activity_detectort) mpf_activity_detector_create(apr_pool_t pool) { mpf_activity_detector_t _detector = apr_palloc(pool,sizeof(mpf_activity_detector_t)); detector->levelthreshold = 2; / 0 .. 255 _/ detector->speechtimeout = 200; / 0.2 s _/ detector->silencetimeout = 200; / 0.2 s _/ detector->noinputtimeout = 5000; / 5 s */ detector->duration = 0; detector->frame_duration = CODEC_FRAME_TIME_BASE; detector->state = DETECTOR_STATE_INACTIVITY; return detector; }

/* Reset activity detector / MPF_DECLARE(void) mpf_activity_detector_reset(mpf_activity_detector_t *detector) { detector->duration = 0; detector->state = DETECTOR_STATE_INACTIVITY; }

/* Set threshold of voice activity (silence) level / MPF_DECLARE(void) mpf_activity_detector_level_set(mpf_activity_detector_t *detector, apr_size_t level_threshold) { detector->level_threshold = level_threshold; }

/* Set noinput timeout / MPF_DECLARE(void) mpf_activity_detector_noinput_timeout_set(mpf_activity_detector_t *detector, apr_size_t noinput_timeout) { detector->noinput_timeout = noinput_timeout; }

/* Set timeout required to trigger speech (transition from inactive to active state) / MPF_DECLARE(void) mpf_activity_detector_speech_timeout_set(mpf_activity_detector_t *detector, apr_size_t speech_timeout) { detector->speech_timeout = speech_timeout; }

/* Set timeout required to trigger silence (transition from active to inactive state) / MPF_DECLARE(void) mpf_activity_detector_silence_timeout_set(mpf_activity_detector_t *detector, apr_size_t silence_timeout) { detector->silence_timeout = silence_timeout; }

MPF_DECLARE(void) mpf_activity_frame_duration_set(mpf_activity_detector_t *detector, apr_size_t frame_duration) { detector->frame_duration = frame_duration; }

static APR_INLINE void mpf_activity_detector_state_change(mpf_activity_detector_t *detector, mpf_detector_state_e state) { detector->duration = 0; detector->state = state; }

static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t frame) { apr_size_t samplesCount = frame->codec_frame.size / 2; int per_ms_frames = 10; apr_size_t sampleRate = 8000; size_t samples = sampleRate per_ms_frames / 1000; if (samples == 0) { apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,samples== 0 "); return (apr_size_t)-1; } size_t nTotal = (samplesCount / samples); int16_t input = frame->codec_frame.buffer; VadInst vadInst; if (WebRtcVad_Create(&vadInst)) { apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,WebRtcVad_Init != 0 "); return (apr_size_t)-1; } int status = WebRtcVad_Init(vadInst); if (status != 0) { WebRtcVad_Free(vadInst); apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,WebRtcVad_Init != 0 "); return (apr_size_t)-1; }

int16_t vad_mode = 1;
status = WebRtcVad_set_mode(vadInst, vad_mode);
if (status != 0) {
    WebRtcVad_Free(vadInst);
    apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回-1,WebRtcVad_set_mode != 0 ");
    return (apr_size_t)-1;
}
size_t cnt = 0;
size_t i = 0;
if (nTotal > 0) {
    for (i = 0; i < nTotal; i++) {
        //int keep_weight = 0;
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
        // printf("==========%d=============== \n", nVadRet);
        if (nVadRet == -1) {
            WebRtcVad_Free(vadInst);
            apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate,返回-1");
            return (apr_size_t)-1;
        }
        else {
            if (nVadRet >= 1) {
                cnt++;
            }
        }
        input += samples;
    }
    WebRtcVad_Free(vadInst);
    if (cnt < nTotal / 10) {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回0,有效帧数量小于总帧10分之一");
        return 0;
    }
    else {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回1,有效帧数量超过总帧10分之一");
        return 1;
    }
}
if (nTotal == 0) {
    int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samplesCount);
    //printf("==========%d=============== \n", nVadRet);
    //apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "level_calculate 返回%d,nTotal==0", nVadRet);
    apt_log(APT_LOG_MARK, APT_PRIO_ERROR, "VAD结果 = %d, frame大小= %d", nVadRet, frame->codec_frame.size);
    //if (globalFile == NULL) {
    //    globalFile = fopen("D://codes/workspace-c++/unimrcp/unimrcp-1.8.0/Debug/var/test.pcm", "wb");
    //} 
    //fwrite(frame->codec_frame.buffer, 1, frame->codec_frame.size, globalFile);
    WebRtcVad_Free(vadInst);
    return nVadRet;
}

return 0;

}

MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t detector, const mpf_frame_t frame) { mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE; apr_size_t level = 0; if ((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) { level = mpf_activity_detector_level_calculate(frame); #if 0 apt_log(APT_LOG_MARK, APT_PRIO_INFO, "Activity Detector --------------------- [%"APR_SIZE_T_FMT"]", level); #endif }

if (detector->state == DETECTOR_STATE_INACTIVITY) {
    if (level >= 1) {
        apt_log(APT_LOG_MARK,APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_ACTIVITY_TRANSITION---------------- [%"APR_SIZE_T_FMT"]",level);
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_ACTIVITY_TRANSITION);
    }
    else {
        detector->duration += CODEC_FRAME_TIME_BASE;
        if (detector->duration >= detector->noinput_timeout) {
            det_event = MPF_DETECTOR_EVENT_NOINPUT;
        }
    }
}
else if (detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
    if (level >= 1) {
        detector->duration += CODEC_FRAME_TIME_BASE;
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_ACTIVITY-------11111--------- [%"APR_SIZE_T_FMT"]",level);
        if (detector->duration >= detector->speech_timeout) {
            det_event = MPF_DETECTOR_EVENT_ACTIVITY;
            mpf_activity_detector_state_change(detector, DETECTOR_STATE_ACTIVITY);
        }
    }
    else {
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_INACTIVITY);
    }
}
else if (detector->state == DETECTOR_STATE_ACTIVITY) {
    if (level >= 1) {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_ACTIVITY--------2222-------- [%"APR_SIZE_T_FMT"]",level);
        detector->duration += CODEC_FRAME_TIME_BASE;
    }
    else {
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_INACTIVITY_TRANSITION---------------- [%"APR_SIZE_T_FMT"]",level);
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_INACTIVITY_TRANSITION);
    }
}
else if (detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
    if (level >= 1) {
        mpf_activity_detector_state_change(detector, DETECTOR_STATE_ACTIVITY);
    }
    else {
        detector->duration += CODEC_FRAME_TIME_BASE;
        apt_log(APT_LOG_MARK, APT_PRIO_DEBUG, "Activity STATE duration:%d, silence_timeout:%d)", detector->duration, detector->silence_timeout);
        if (detector->duration >= detector->silence_timeout) {
            apt_log(APT_LOG_MARK, APT_PRIO_DEBUG,"Activity Detector ----DETECTOR_STATE_INACTIVITY---------------- [%"APR_SIZE_T_FMT"]",level);
            det_event = MPF_DETECTOR_EVENT_INACTIVITY;
            mpf_activity_detector_state_change(detector, DETECTOR_STATE_INACTIVITY);
        }
    }
}
return det_event;

}

我是直接用你的工程测试的,发现一连接freeswitch,vad就触发了。你的工程不是已经集成好webrtc-vad了吗、 里面有点改不动哈哈,你测试没这个问题吗???

chinarui-na commented 3 months ago

现在安装时提示modules/vad does not exist

ChenchenJT commented 2 months ago

@najsword 老哥,你好,你跑起来了吗,可以交流一下吗

ChenchenJT commented 2 months ago

@xn67744 你好,目前webrtcvad编译失败,想问一下是什么原因