CUBLAS_STATUS_ALLOC_FAILED when calling cublasCreate(handle)

Same issue as #179. Doing the fix suggested by @kinoc doesn't work. For me, the problem comes at this stage:- learner.lr_find(start_lr=1e-5,optimizer_type='lamb')
This results in this stack trace:-
---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-13-fc2a900ad6bc> in <module>()
----> 1 learner.lr_find(start_lr=1e-5,optimizer_type='lamb')

16 frames

/usr/local/lib/python3.6/dist-packages/fast_bert/learner_cls.py in lr_find(self, start_lr, end_lr, use_val_loss, optimizer_type, num_iter, step_mode, smooth_f, diverge_th)
    654         for iteration in tqdm(range(num_iter)):
    655             # train on batch and retrieve loss
--> 656             loss = self._train_batch(train_iter)
    657             if use_val_loss:
    658                 loss = self.validate(quiet=True, loss_only=True)["loss"]

/usr/local/lib/python3.6/dist-packages/fast_bert/learner_cls.py in _train_batch(self, train_iter)
    699                 inputs["token_type_ids"] = batch[2]
    700 
--> 701             outputs = self.model(**inputs)
    702             loss = outputs[
    703                 0

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/transformers/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states)
    342             inputs_embeds=inputs_embeds,
    343             output_attentions=output_attentions,
--> 344             output_hidden_states=output_hidden_states,
    345         )
    346         sequence_output = outputs[0]

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/transformers/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, output_attentions, output_hidden_states)
    760             encoder_attention_mask=encoder_extended_attention_mask,
    761             output_attentions=output_attentions,
--> 762             output_hidden_states=output_hidden_states,
    763         )
    764         sequence_output = encoder_outputs[0]

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/transformers/modeling_bert.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions, output_hidden_states)
    437                     encoder_hidden_states,
    438                     encoder_attention_mask,
--> 439                     output_attentions,
    440                 )
    441             hidden_states = layer_outputs[0]

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/transformers/modeling_bert.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions)
    369     ):
    370         self_attention_outputs = self.attention(
--> 371             hidden_states, attention_mask, head_mask, output_attentions=output_attentions,
    372         )
    373         attention_output = self_attention_outputs[0]

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/transformers/modeling_bert.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions)
    313     ):
    314         self_outputs = self.self(
--> 315             hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions,
    316         )
    317         attention_output = self.output(self_outputs[0], hidden_states)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/transformers/modeling_bert.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions)
    219         output_attentions=False,
    220     ):
--> 221         mixed_query_layer = self.query(hidden_states)
    222 
    223         # If this is instantiated as a cross-attention module, the keys

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     91 
     92     def forward(self, input: Tensor) -> Tensor:
---> 93         return F.linear(input, self.weight, self.bias)
     94 
     95     def extra_repr(self) -> str:

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1690         ret = torch.addmm(bias, input, weight.t())
   1691     else:
-> 1692         output = input.matmul(weight.t())
   1693         if bias is not None:
   1694             output += bias

RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
Can anyone provide some insight on how to solve this error?
utterworks / fast-bert

CUBLAS_STATUS_ALLOC_FAILED when calling cublasCreate(handle) #283