TensorSpeech / TensorFlowTTS

:stuck_out_tongue_closed_eyes: TensorFlowTTS: Real-Time State-of-the-art Speech Synthesis for Tensorflow 2 (supported including English, French, Korean, Chinese, German and Easy to adapt for other languages)
https://tensorspeech.github.io/TensorFlowTTS/
Apache License 2.0
3.83k stars 813 forks source link

ValueError: Inconsistent shapes: saw (None,) but expected (None, 1) #677

Closed Greeksilverfir closed 2 years ago

dathudeptrai commented 3 years ago

@Greeksilverfir can you share with me where is a bug comes from ?

stale[bot] commented 2 years ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs.

RiuHDuo commented 2 years ago

@Greeksilverfir can you share with me where is a bug comes from ?

I met same error when i convert h5 to tflite

  File "c.py", line 50, in <module>
    main()
  File "c.py", line 37, in main
    tacotron2_convert = TFAutoModel.from_pretrained(
  File "/Volumes/SanDisk/TensorFlowTTS 2/tensorflow_tts/inference/auto_model.py", line 104, in from_pretrained
    model._build()
  File "/Volumes/SanDisk/TensorFlowTTS 2/tensorflow_tts/inference/savable_models.py", line 43, in _build
    self([input_ids, input_lengths, speaker_ids])
  File "/Users/riuhduo/opt/miniconda3/envs/py38/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Volumes/SanDisk/TensorFlowTTS 2/tensorflow_tts/inference/savable_models.py", line 37, in call
    return super().inference(input_ids, input_lengths, speaker_ids)
  File "/var/folders/p3/c8f_zcd52tx_qt0_ydc6ft1m0000gn/T/__autograph_generated_filebpd4rpdw.py", line 32, in tf__inference
    ((frames_prediction, stop_token_prediction, _), final_decoder_state, _) = ag__.converted_call(ag__.ld(dynamic_decode), (ag__.ld(self).decoder,), dict(maximum_iterations=ag__.ld(self).maximum_iterations, training=False), fscope)
  File "/var/folders/p3/c8f_zcd52tx_qt0_ydc6ft1m0000gn/T/__autograph_generated_filekxwlgqoz.py", line 408, in tf__dynamic_decode
    res = ag__.converted_call(ag__.ld(tf).while_loop, (ag__.ld(condition), ag__.ld(body)), dict(loop_vars=(ag__.ld(initial_time), ag__.ld(initial_outputs_ta), ag__.ld(initial_state), ag__.ld(initial_inputs), ag__.ld(initial_finished), ag__.ld(initial_sequence_lengths)), parallel_iterations=ag__.ld(parallel_iterations), maximum_iterations=ag__.ld(maximum_iterations), swap_memory=ag__.ld(swap_memory)), fscope)
  File "/var/folders/p3/c8f_zcd52tx_qt0_ydc6ft1m0000gn/T/__autograph_generated_filekxwlgqoz.py", line 400, in body
    outputs_ta = ag__.converted_call(ag__.ld(tf).nest.map_structure, (ag__.autograph_artifact((lambda ta, out: ag__.converted_call(ag__.ld(ta).write, (ag__.ld(time), ag__.ld(out)), None, fscope_4))), ag__.ld(outputs_ta), ag__.ld(emit)), None, fscope_4)
  File "/var/folders/p3/c8f_zcd52tx_qt0_ydc6ft1m0000gn/T/__autograph_generated_filekxwlgqoz.py", line 400, in <lambda>
    outputs_ta = ag__.converted_call(ag__.ld(tf).nest.map_structure, (ag__.autograph_artifact((lambda ta, out: ag__.converted_call(ag__.ld(ta).write, (ag__.ld(time), ag__.ld(out)), None, fscope_4))), ag__.ld(outputs_ta), ag__.ld(emit)), None, fscope_4)
ValueError: Exception encountered when calling layer "tacotron2" (type SavableTFTacotron2).

in user code:

    File "/Volumes/SanDisk/TensorFlowTTS 2/tensorflow_tts/models/tacotron2.py", line 932, in inference  *
        (
    File "/Volumes/SanDisk/TensorFlowTTS 2/tensorflow_tts/utils/decoder.py", line 256, in body  *
        outputs_ta = tf.nest.map_structure(

    ValueError: Inconsistent shapes: saw (None,) but expected (None, 1) 

Call arguments received by layer "tacotron2" (type SavableTFTacotron2):
  • inputs=['tf.Tensor(shape=(1, 9), dtype=int32)', 'tf.Tensor(shape=(1,), dtype=int32)', 'tf.Tensor(shape=(1,), dtype=int32)']
  • training=False

c.py

import tensorflow as tf
from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

def convert_to_tflite(model, name="model", quantize=True):
  # Concrete Function
  concrete_function = model.inference_tflite.get_concrete_function()
  converter = tf.lite.TFLiteConverter.from_concrete_functions(
      [concrete_function]
  )
  converter.optimizations = [tf.lite.Optimize.DEFAULT]
  converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
                                        tf.lite.OpsSet.SELECT_TF_OPS]
  # converter.target_spec.supported_types = [tf.float16] 
  # it has bug if you use tf.float16, see https://github.com/TensorSpeech/TensorFlowTTS/issues/346#issuecomment-728656417
  # This colab doesn't care about the latency, so it compressed the model with quantization. 8 bit run on desktop will slow.

  if not quantize:
    converter.target_spec.supported_types = [tf.float32]

  tflite_model = converter.convert()

  saved_path = name + '_quan.tflite' if quantize else name + '.tflite'

  # Save the TF Lite model.
  with open(saved_path, 'wb') as f:
  # with open('fastspeech_quant.tflite', 'wb') as f:
    f.write(tflite_model)

  print('Model: %s size is %f MBs.' % (name, len(tflite_model) / 1024 / 1024.0) )

  return saved_path

def main():
    tacotron2_config = AutoConfig.from_pretrained('./examples/tacotron2/conf/tacotron2.baker.v1.yaml')
    tacotron2_convert = TFAutoModel.from_pretrained(
        config=tacotron2_config,
        pretrained_path="./examples/tacotron2/exp/train.tacotron2.baker.v1/checkpoints/model-200000.h5",
        name="tacotron2",
        enable_tflite_convertible=True,
    )
    tacotron2_convert.setup_window(win_front=6, win_back=6)
    tacotron2_convert.setup_maximum_iterations(1000) # be careful
    tacotron2_tflite_path = convert_to_tflite(tacotron2_convert, "tacotron2")
    print(tacotron2_tflite_path)

if __name__ == "__main__":
    main()