bug: bentoml serve hangs infinitely when importing local modules

SoonbeomChoi commented 1 year ago

Describe the bug

I recently upgrade bentoml to the latest version from 1.0.10 and 'bentoml serve' suddenly doesn't work for this version. I found that importing local python modules with init.py re-runs 'bentoml serve' infinitly (it is not reload it re-runs whole python script from start to the end). python script itself works without error but 'bentoml serve' hangs when I import such local modules.

"Init: Language setting - universal: true' in the image is just single line print() in my service.py but it keeps printing and bentoml doesn't startup. Screen Shot 2023-03-23 at 2 33 00 AM

For example, if I have 'util' folder as below and 'import util' or 'from util import a' makes this problem. When I refactor the identical code without init.py it solves the problem but I don't think it is the ideal solution. `util

init.py
a.py
b.py`

To reproduce

No response

Expected behavior

No response

Environment

bentoml: 1.0.16 python: 3.9.13

aarnphm commented 1 year ago

Hi there, can you send the output of bentoml serve --production --debug?

sauyon commented 1 year ago

I've attempted to reproduce this locally but it seems to work fine for me, could you share an example bento that doesn't work, and share the full output of bentoml env?

SoonbeomChoi commented 1 year ago

Thanks for your fast response. I'm sorry I tested my code more deeply and find out that importing module itself wasn't the problem. importing wandb causes the infinite loop and my modules somehow imports wandb or part of it.

Now I can solve the problem just by removing import wandb but may I ask you if you have any guess why importing wandb causes infinite loop.

Here are some envs might related with this issue. python: 3.9.13 bentoml: 1.0.16 wandb: 0.13.11 uvicorn: 0.21.1 starlette: 0.25.0

aarnphm commented 1 year ago

Can you send your service definition here?

SoonbeomChoi commented 1 year ago

There are some lines for preprocessing. 'import wandb' is in util, module.

import math
import io
from typing import List

import torch
import torchaudio
import torch.nn.functional as F
import bentoml
from bentoml.io import NumpyNdarray, Text, File, JSON
from pydantic import BaseModel
from fastapi import FastAPI
from fastapi.responses import PlainTextResponse

import module
import util
from util.config import Config
from util.audio.analyzer import MelSpectrogram
from util.torch import set_device, load_checkpoint
from util.text import english
from util.midi import MIDISegmenter, get_position
from preprocessor.sk_multi_singer import text
from module.dataloader import crossfade

config = Config(["config/univ44k_mel96_r4_usv_san_f0denoise10_ref10_l1_step50_ft.yaml"])
config.batch_size = 1
config.ref_prob = 1.0

ref_list = {
    'aimymoonA': {
        'Normal': [
                './resource/aimymoon/aimymoon_shootingstar_bridge_ref.wav',
                './resource/aimymoon/aimymoon_rockstar_chorus_ref.wav',
                './resource/aimymoon/Dope_109_Dbmaj_8_Verse2_EQ,COMP.wav',
                './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
                './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav'
            ],
        'Uplift': [
            './resource/aimymoon/ShootingStar_80_Fmaj_10_Chorus2.wav',
            './resource/aimymoon/aimymoon_rockstar_chorus_ref.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_Verse2_EQ,COMP.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav'],
        'Emotion': [
            './resource/aimymoon/aimymoon_shootingstar_bridge_ref.wav',
            './resource/aimymoon/aimymoon_rockstar_chorus_ref.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusB2_EQ,COMP.wav'],
        'Power': [
            './resource/aimymoon/ShootingStar_80_Fmaj_10_Chorus2.wav',
            './resource/aimymoon/aimymoon_rockstar_chorus_ref.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav',
            './resource/hodong/break_up2.wav'],
    },
    'aimymoonB': {
        'Normal': [
                './resource/aimymoon/aimymoon_shootingstar_bridge_ref.wav',
                './resource/aimymoon/Dope_109_Dbmaj_8_Verse2_EQ,COMP.wav',
                './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
                './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav'],
        'Uplift': [
            './resource/aimymoon/ShootingStar_80_Fmaj_10_Chorus2.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_Verse2_EQ,COMP.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav'],
        'Emotion': [
            './resource/aimymoon/aimymoon_shootingstar_bridge_ref.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusB2_EQ,COMP.wav'],
        'Power': [
            './resource/aimymoon/ShootingStar_80_Fmaj_10_Chorus2.wav',
            './resource/aimymoon/Dope_109_Dbmaj_8_PreChorusB_EQ,COMP.wav',
            './resource/aimymoon/HowDoILook_80_Amaj_8_ChorusA2_EQ,COMP.wav',
            './resource/hodong/break_up2.wav'],
    },
    'hodong': {
        'Normal': ['./resource/hodong/ro_01071_+0_a_s06_f_02_prechorus.wav'],
        'Calm': ['./resource/hodong/ro_05477_+0_a_s06_f_02_verse.wav'],
        'Emotion': ['./resource/hodong/ro_05477_+0_a_s06_f_02_chorus.wav'],
        'Power': ['./resource/hodong/break_up2.wav']
    },
    'manA': {
        'Normal': ['./resource/manA/ro_m20_s02_08304_verse.wav', './resource/manA/epic_poem2.wav'],
        'Uplift': ['./resource/manA/ro_m20_s02_08304_chorus.wav', './resource/manA/epic_poem2.wav'],
        'Emotion': ['./resource/manA/ba_05688_-4_a_s02_m_02_chorus.wav', './resource/manA/epic_poem2.wav'],
    },
    'pitbull': {
        'Normal': [
            './resource/pitbull/pitbull001.wav',
            './resource/pitbull/pitbull003.wav'],
    },
    'taeyeon': {
        'Normal': ['./resource/taeyeon/ty015.wav'],
        'Uplift': ['./resource/taeyeon/ty047.wav'],
        'Emotion': ['./resource/taeyeon/ty029.wav']
    },
}

synthesizer_map = {
    'aimymoonA': 'aimymoon',
    'aimymoonB': 'aimymoon',
    'hodong': 'unified',
    'manA': 'manA',
    'pitbull': 'pitbull',
    'taeyeon': 'taeyeon',
}

timbre_list = {}

# Initialize models
synthesizer = {}
synthesizer['unified'] = bentoml.pytorch.get('unified_latest_checkpoint:latest').to_runner()
synthesizer['aimymoon'] = bentoml.pytorch.get('aimymoon_latest_checkpoint:latest').to_runner()
synthesizer['hodong'] = bentoml.pytorch.get('hodong_checkpoint250:latest').to_runner()
synthesizer['manA'] = bentoml.pytorch.get('man_a_checkpoint150:latest').to_runner()
synthesizer['pitbull'] = bentoml.pytorch.get('pitbull_checkpoint500:latest').to_runner()
synthesizer['taeyeon'] = bentoml.pytorch.get('taeyeon_latest_checkpoint:latest').to_runner()
vocoder = bentoml.pytorch.get('hifigan_ms44k_24k_mel96_3185k:latest').to_runner()

svc = bentoml.Service('unified', runners=[
    synthesizer['unified'],
    synthesizer['aimymoon'],
    synthesizer['manA'],
    synthesizer['pitbull'],
    synthesizer['taeyeon'],
    vocoder])

fastapi_app = FastAPI()
svc.mount_asgi_app(fastapi_app)

class SyllableData(BaseModel):
    syllables: List[str]

class NoteData(BaseModel):
    start: float
    pitch: int
    end: float

class InputData(BaseModel):
    singer: str
    text: str
    note: List[NoteData]
    expression: float
    style: str

def swap_dict_list(x):
    """ Swap dict of list with the same lengths to list of dict """
    length = len(next(iter(x.values())))
    for key in x:
        if len(x[key]) != length:
            raise AssertionError('All the values should be the same length of lists.')

    y = []
    for i in range(length):
        temp = {key: x[key][i] for key in x}
        y.append(temp)

    return y

def preprocess(inputs):
    data = dict()
    frame_range = None

    data['note'] = inputs.note
    data['text'] = text.encode(inputs.text, run_g2p=False)

    segmenter = MIDISegmenter(config, 0, config.segment[0])
    data, frame_range = segmenter.run(data, retain_silence=True)

    if 'src_pos' not in data:
        data['src_pos'] = []
        for i in range(len(data[config.io['input'][0]])):
            src_len = data[config.io['input'][0]][i].size(-1)
            data['src_pos'].append(
                torch.LongTensor(get_position(src_len)))

    if 'trg_pos' not in data:
        data['trg_pos'] = []
        for i in range(len(data[config.io['output'][0]])):
            trg_len = data[config.io['output'][0]][i].size(-1)
            data['trg_pos'].append(
                torch.LongTensor(get_position(trg_len)))

    data = swap_dict_list(data)
    for i in range(len(data)):
        data[i]['id'] = 'saebyul'

    return data, frame_range

def extract_timbre():
    # Load mel-spectrogram
    for singer in ref_list.keys():
        timbre_list[singer] = {}
        for style in ref_list[singer].keys():
            ref = []
            ref_len = []
            for i in range(len(ref_list[singer][style])):
                audio = util.audio.load(ref_list[singer][style][i], config.sample_rate, load_mono=True)
                mel_fn = MelSpectrogram(config)
                mel = 1.1*mel_fn(audio).unsqueeze(0)
                ref_size = config.downsample_ratio*int(mel.size(-1)//config.downsample_ratio)

                ref.append(mel[...,:ref_size].cuda())
                ref_len.append(torch.tensor([ref_size]).cuda())

            timbre_list[singer][style] = synthesizer[synthesizer_map[singer]].extract_timbre.run(tuple(ref), ref_len)

def run_vocoder(mel):
    crossfade_size = int((config.sample_rate/config.hop_size)*config.segment[0]['crossfade'])
    for i in range(len(mel) - 1):
        mel[i+1][..., :crossfade_size] = crossfade(
            mel[i][..., -crossfade_size:], mel[i+1][..., :crossfade_size])
        mel[i] = mel[i][..., :-crossfade_size]

    mel = torch.cat(mel, dim=-1)
    if config.norm_fn == 'dBNorm':
        min_level_db = config.norm_params['min_level_db']
        mel = -min_level_db*mel.clamp(0, 1) + min_level_db
        mel = math.log(10.0)*mel/20.0
    if mel.ndim < 3:
        mel = mel.unsqueeze(0)

    return vocoder.predict.run(mel).cpu()

syllable_output = JSON(pydantic_model=SyllableData)
@svc.api(input=Text(), output=syllable_output)
def get_syllables(graph: str) -> str:
    return {'syllables': english.get_syllables(graph)}

synthesize_input = JSON(pydantic_model=InputData)
@svc.api(input=synthesize_input, output=File(mime_type='audio/wav'))
async def synthesize(inputs):
    print(inputs.singer)
    data, frame_range = preprocess(inputs)
    dataloader = module.get_dataloader({'test': data}, config)

    min_length = int(config.segment[0]['crossfade']*config.sample_rate/config.hop_size)
    step = config.train_step[0] + 1

    if len(timbre_list.keys()) == 0: extract_timbre()

    mel = []
    for i, x in enumerate(dataloader['test']):
        with torch.no_grad():
            if (x['trg_len'] >= min_length).item():
                x['timbre'] = timbre_list[inputs.singer][inputs.style]
                x['f0_diff'] = torch.tensor([inputs.expression])
                if x['text'].device != torch.device('cpu'):
                    x = set_device(x, [x['text'].device])

                y = await synthesizer[synthesizer_map[inputs.singer]].predict.async_run(x, step)

                mel.append(y['mel'].detach().clone())

    audio = run_vocoder(mel)

    buffer = io.BytesIO()
    torchaudio.save(buffer, audio, config.sample_rate, format='wav')
    buffer.seek(0)

    return buffer

@fastapi_app.get('/robots.txt', response_class=PlainTextResponse)
def robots():
    msg = 'User-agent: *\nDisallow: /'

    return PlainTextResponse(msg, 200)

bentoml / BentoML