(Experimental Branch) error: identifier "aten_sigmoid_flat__1" is undefined

RAYTRAC3R commented 4 years ago

For the past week or two, I've been training in Google Colab using the experimental branch, and it's gone well. I do have to make a few changes to the code for it to function in Colab.

However, I tried to do some more training today, and I've ran into an error that I can't figure out. It was when I ran the training script with my own dataset, using the last checkpoint I had.

I'm training at a 44100 sampling rate, with hop size, window size, etc. adjusted accordingly. I had to adjust the n_speakers and decoder_rnn_dim, and turn off the second decoder, so that my old checkpoints would be compatible.

    train(args, args.rank, args.group_name, hparams)
  File "train.py", line 707, in train
    y_pred = force(model, valid_kwargs=model_args, **{**y, "teacher_force_till": teacher_force_till, "p_teacher_forcing": p_teacher_forcing, "drop_frame_rate": drop_frame_rate})
  File "/content/cookietts/CookieTTS/utils/_utils_.py", line 35, in force
    return func(*args, **{k:v for k,v in kwargs.items() if k in valid_kwargs})
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/apex/amp/_initialize.py", line 197, in new_fwd
    **applier(kwargs, input_caster))
  File "/content/cookietts/CookieTTS/_2_ttm/tacotron2_tm/model.py", line 1012, in forward
    return_hidden_state=return_hidden_state)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/content/cookietts/CookieTTS/_2_ttm/tacotron2_tm/model.py", line 835, in forward
    mel_output, gate_output, attention_weights, decoder_hidden_attention_context = self.decode(decoder_input, memory_lengths)
  File "/content/cookietts/CookieTTS/_2_ttm/tacotron2_tm/model.py", line 746, in decode
    decoderrnn_state = self.decoder_rnn(decoder_input, (decoder_hidden, decoder_cell))# lstmcell 12.789ms
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/content/cookietts/CookieTTS/utils/model/layers.py", line 386, in forward
    self.bias_ih, self.bias_hh,
RuntimeError: default_program(57): error: identifier "aten_sigmoid_flat__1" is undefined

default_program(58): error: no operator "=" matches these operands
            operand types are: half = float

default_program(64): error: identifier "aten_mul_flat__1" is undefined

default_program(65): error: no operator "=" matches these operands
            operand types are: half = float

4 errors detected in the compilation of "default_program".

nvrtc compilation failed: 

#define NAN __int_as_float(0x7fffffff)
#define POS_INFINITY __int_as_float(0x7f800000)
#define NEG_INFINITY __int_as_float(0xff800000)

template<typename T>
__device__ T maximum(T a, T b) {
  return isnan(a) ? a : (a > b ? a : b);
}

template<typename T>
__device__ T minimum(T a, T b) {
  return isnan(a) ? a : (a < b ? a : b);
}

#define __HALF_TO_US(var) *(reinterpret_cast<unsigned short *>(&(var)))
#define __HALF_TO_CUS(var) *(reinterpret_cast<const unsigned short *>(&(var)))
#if defined(__cplusplus)
  struct __align__(2) __half {
    __host__ __device__ __half() { }

  protected:
    unsigned short __x;
  };

  /* All intrinsic functions are only available to nvcc compilers */
  #if defined(__CUDACC__)
    /* Definitions of intrinsics */
    __device__ __half __float2half(const float f) {
      __half val;
      asm("{  cvt.rn.f16.f32 %0, %1;}\n" : "=h"(__HALF_TO_US(val)) : "f"(f));
      return val;
    }

    __device__ float __half2float(const __half h) {
      float val;
      asm("{  cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(__HALF_TO_CUS(h)));
      return val;
    }

  #endif /* defined(__CUDACC__) */
#endif /* defined(__cplusplus) */
#undef __HALF_TO_US
#undef __HALF_TO_CUS

typedef __half half;

extern "C" __global__
void func_3(half* t0, half* t1, half* aten_mul_flat, half* aten_sigmoid_flat, half* aten_mul_flat_1, half* aten_tanh_flat, half* aten_sigmoid_flat_1, half* prim_constantchunk_flat) {
{
  float v = __half2float(t1[((512 * blockIdx.x + threadIdx.x) % 1280 + 4 * (((512 * blockIdx.x + threadIdx.x) / 1280) * 1280)) + 3840]);
  prim_constantchunk_flat[512 * blockIdx.x + threadIdx.x] = __float2half(v);
  float t1_ = __half2float(t1[((512 * blockIdx.x + threadIdx.x) % 1280 + 4 * (((512 * blockIdx.x + threadIdx.x) / 1280) * 1280)) + 1280]);
  float aten_sigmoid_flat_ = __half2float(aten_sigmoid_flat[512 * blockIdx.x + threadIdx.x]);
  aten_sigmoid_flat__1 = __float2half(1.f / (1.f + (expf(0.f - t1_))));
  aten_sigmoid_flat[512 * blockIdx.x + threadIdx.x] = aten_sigmoid_flat_;
  float t1__1 = __half2float(t1[((512 * blockIdx.x + threadIdx.x) % 1280 + 4 * (((512 * blockIdx.x + threadIdx.x) / 1280) * 1280)) + 2560]);
  aten_tanh_flat[512 * blockIdx.x + threadIdx.x] = __float2half(tanhf(t1__1));
  float t1__2 = __half2float(t1[(512 * blockIdx.x + threadIdx.x) % 1280 + 4 * (((512 * blockIdx.x + threadIdx.x) / 1280) * 1280)]);
  aten_sigmoid_flat_1[512 * blockIdx.x + threadIdx.x] = __float2half(1.f / (1.f + (expf(0.f - t1__2))));
  float aten_mul_flat_ = __half2float(aten_mul_flat[512 * blockIdx.x + threadIdx.x]);
  aten_mul_flat__1 = __float2half((1.f / (1.f + (expf(0.f - t1_)))) * __half2float(t0[512 * blockIdx.x + threadIdx.x]));
  aten_mul_flat[512 * blockIdx.x + threadIdx.x] = aten_mul_flat_;
  aten_mul_flat_1[512 * blockIdx.x + threadIdx.x] = __float2half((1.f / (1.f + (expf(0.f - t1__2)))) * (tanhf(t1__1)));
}
}

Epoch::  46% 456/1000 [00:12<00:14, 37.47epoch/s]     
Iter:  :   0% 0/67 [00:11<?, ?iter/s]
/content/cookietts/CookieTTS/utils/torchmoji/model_def.py:193: UserWarning: This overload of nonzero is deprecated:
    nonzero()
Consider using one of the following signatures instead:
    nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])])

CookiePPP commented 4 years ago

Yeah... that's a new one. I can't see enough information here to identify the cause.

RAYTRAC3R commented 4 years ago

I'm still getting this error. I think it's connected to FP16, based on this instance of someone getting a similar error. https://github.com/pytorch/pytorch/issues/47138

I tried turning off FP16, and I got a little further, but I ended up bumping into a whole separate error.


  File "train.py", line 933, in <module>
    train(args, args.rank, args.group_name, hparams)
  File "train.py", line 749, in train
    optimizer.step()
  File "/usr/local/lib/python3.6/dist-packages/torch/autograd/grad_mode.py", line 26, in decorate_context
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/optim/adam.py", line 119, in step
    group['eps']
  File "/usr/local/lib/python3.6/dist-packages/torch/optim/functional.py", line 86, in adam
    exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
RuntimeError: The size of tensor a (1313) must match the size of tensor b (2) at non-singleton dimension 1
Epoch::  73% 1092/1500 [01:17<00:28, 14.08epoch/s]      
Iter:  :   0% 0/28 [01:17<?, ?iter/s]
/content/cookietts/CookieTTS/utils/torchmoji/model_def.py:193: UserWarning: This overload of nonzero is deprecated:
    nonzero()
Consider using one of the following signatures instead:
    nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])])
/content/cookietts/CookieTTS/utils/torchmoji/model_def.py:193: UserWarning: This overload of nonzero is deprecated:
    nonzero()
Consider using one of the following signatures instead:
    nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])])
/content/cookietts/CookieTTS/utils/torchmoji/model_def.py:193: UserWarning: This overload of nonzero is deprecated:
    nonzero()
Consider using one of the following signatures instead:
    nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])])```

CookiePPP commented 4 years ago

You'll have to use --warm_start_force to deal with the 2nd error, looks like the optimizer has changed from your checkpoints version for some reason.

RAYTRAC3R commented 4 years ago

Got some more training done by warm starting the model, now whenever I try to do inference with the resulting model, the ngrok page loads okay but then when I try to generate something it crashes with this.


Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/usr/local/lib/python3.6/dist-packages/flask/_compat.py", line 39, in reraise
    raise value
  File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "app.py", line 88, in texttospeech
    tts_outdict = t2s.infer(**tts_dict)
  File "/usr/local/lib/python3.6/dist-packages/torch/autograd/grad_mode.py", line 15, in decorate_context
    return func(*args, **kwargs)
  File "/content/cookietts/CookieTTS/_5_infer/t2s_server/text2speech.py", line 526, in infer
    outputs = self.tacotron.inference(sequence, text_lengths.repeat_interleave(batch_size_per_text, dim=0), tacotron_speaker_ids, style_input)
  File "/content/cookietts/CookieTTS/_2_ttm/tacotron2_tm/model.py", line 1086, in inference
    res_embed, zr, r_mu, r_logvar = self.res_enc(gt_mel, rand_sampling=False)# -> [B, embed]
NameError: name 'gt_mel' is not defined```

CookiePPP commented 4 years ago

try again? (new commit should've updated this)

RAYTRAC3R commented 4 years ago

I'm still having the original sigmoid issue, but it's easily fixed by turning off fp16, and every other issue I've mentioned here has been fixed!

CookiePPP commented 3 years ago

Closing issue. Problem has been patched, and tacotron2_tm should be compatible with Pytorch 1.7 and Nvidia/Apex AMP

CookiePPP / cookietts

(Experimental Branch) error: identifier "aten_sigmoid_flat__1" is undefined #21