modelscope / FunASR

A Fundamental End-to-End Speech Recognition Toolkit and Open Source SOTA Pretrained Models, Supporting Speech Recognition, Voice Activity Detection, Text Post-processing etc.
https://www.funasr.com
Other
4.62k stars 514 forks source link

online模型运行时间长了之后响应速度变慢 #1170

Open yijinsheng opened 7 months ago

yijinsheng commented 7 months ago
  1. 运行环境: 操作系统:linux python:3.8 torch:2.0.0 modelscope:1.9.3 gpu:p100, 显卡驱动535, cuda:11.7 2.执行代码
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
import tracemalloc
import logging

tracemalloc.start()

logger = get_logger(log_level=logging.CRITICAL)
logger.setLevel(logging.CRITICAL)

#
n_gpu = 1
n_cpu = 4
wav_file = "football.wav"

asr_model_online = 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online'

inference_pipeline_asr_online = pipeline(
    task=Tasks.auto_speech_recognition,
    model=asr_model_online,
    ngpu=n_gpu,
    ncpu=n_cpu,
    model_revision='v1.0.7',
    update_model='v1.0.7',
    num_cache_chunks=10,
    mode='paraformer_streaming')

param_dict_asr_online = {"cache": dict(), "chunk_size": [5, 5, 5]}
wav_data = open(wav_file, mode='rb').read()
from tqdm import tqdm
step = 1920 * 5
for i in tqdm(range(0, len(wav_data), step)):
    audio_in = wav_data[i:i + step]
    rec_result = inference_pipeline_asr_online(audio_in=audio_in,
                                               param_dict=param_dict_asr_online)
    print(rec_result)

测试的音频是一个长达3个小时的足球解说视频

  1. 问题:测试的时候发现,随着时间的变长,大概半个小时之后响应的时间会明显变长,从50ms到400ms
yijinsheng commented 7 months ago

完整的跑了一下三个小时的模型,按照这里的代码来运行,打印了下执行次数和运行时间,完整代码如下

import os
import logging
import torch
import soundfile

from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger

logger = get_logger(log_level=logging.CRITICAL)
logger.setLevel(logging.CRITICAL)

os.environ["MODELSCOPE_CACHE"] = "./"
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.7',
    update_model=False,
    mode="paraformer_streaming"
)

model_dir = os.path.join(os.environ["MODELSCOPE_CACHE"],
                         "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online")
speech, sample_rate = soundfile.read("basketball.wav")
speech_length = speech.shape[0]

sample_offset = 0
chunk_size = [0, 10, 5]  # [0, 10, 5] 600ms, [0, 8, 4] 480ms
encoder_chunk_look_back = 4  # number of chunks to lookback for encoder self-attention
decoder_chunk_look_back = 1  # number of encoder chunks to lookback for decoder cross-attention
stride_size = chunk_size[1] * 960
param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
              "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
final_result = ""
import time

for sample_offset in range(0, speech_length, min(stride_size, speech_length - sample_offset)):
    if sample_offset + stride_size >= speech_length - 1:
        stride_size = speech_length - sample_offset
        param_dict["is_final"] = True
    start = time.time()
    rec_result = inference_pipeline(audio_in=speech[sample_offset: sample_offset + stride_size],
                                    param_dict=param_dict)
    end = time.time()
    cost = end - start

    if len(rec_result) != 0:
        final_result += rec_result['text']
        print(rec_result['text'], f"接口运行次数:{sample_offset // stride_size},耗时:{cost}", sep='\t')
print(final_result)

接口运行时间如下图,从一开始的60ms左右上升到最后的200ms asr_online 完整日志文件 asr_online.log 另外跑到最后显存好像也在不断增加,开始跑的时候显存占用只有3G左右,到最后显存快被占满了,cpu从600%上升到900%

WechatIMG567
wwfcnu commented 2 months ago

完整的跑了一下三个小时的模型,按照这里的代码来运行,打印了下执行次数和运行时间,完整代码如下

import os
import logging
import torch
import soundfile

from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger

logger = get_logger(log_level=logging.CRITICAL)
logger.setLevel(logging.CRITICAL)

os.environ["MODELSCOPE_CACHE"] = "./"
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.7',
    update_model=False,
    mode="paraformer_streaming"
)

model_dir = os.path.join(os.environ["MODELSCOPE_CACHE"],
                         "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online")
speech, sample_rate = soundfile.read("basketball.wav")
speech_length = speech.shape[0]

sample_offset = 0
chunk_size = [0, 10, 5]  # [0, 10, 5] 600ms, [0, 8, 4] 480ms
encoder_chunk_look_back = 4  # number of chunks to lookback for encoder self-attention
decoder_chunk_look_back = 1  # number of encoder chunks to lookback for decoder cross-attention
stride_size = chunk_size[1] * 960
param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
              "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
final_result = ""
import time

for sample_offset in range(0, speech_length, min(stride_size, speech_length - sample_offset)):
    if sample_offset + stride_size >= speech_length - 1:
        stride_size = speech_length - sample_offset
        param_dict["is_final"] = True
    start = time.time()
    rec_result = inference_pipeline(audio_in=speech[sample_offset: sample_offset + stride_size],
                                    param_dict=param_dict)
    end = time.time()
    cost = end - start

    if len(rec_result) != 0:
        final_result += rec_result['text']
        print(rec_result['text'], f"接口运行次数:{sample_offset // stride_size},耗时:{cost}", sep='\t')
print(final_result)

接口运行时间如下图,从一开始的60ms左右上升到最后的200ms asr_online 完整日志文件 asr_online.log 另外跑到最后显存好像也在不断增加,开始跑的时候显存占用只有3G左右,到最后显存快被占满了,cpu从600%上升到900% WechatIMG567

我也遇到了这个问题,请问你解决了没有

yijinsheng commented 2 months ago

加了官方社区群,按照作者的说法,升级2.0的代码之后应该没这个问题了,后面我不做这方面研究了,你可以试试 @wwfcnu