arrmansa / Basic-UI-for-GPT-J-6B-with-low-vram

A repository to run gpt-j-6b on low vram machines (4.2 gb minimum vram for 2000 token context, 3.5 gb for 1000 token context). Model loading takes 12gb free ram.
Apache License 2.0
114 stars 12 forks source link

RuntimeError: where expected condition to be a boolean tensor, but got a tensor with dtype Float #4

Open ebolam opened 3 years ago

ebolam commented 3 years ago

I was successful in getting your code to work on my 2060 laptop after a few tweeks. I just got a tesla M40 card in and am looking at running GPT-J-6 on it using this method. To start though, I thought I'd use the same code with the GPT-NEO-2.7B model to verify that it's working OK. I got the error in the title though when I tried to run it.

Any ideas as to what's going on?

Full error log:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<timed exec> in <module>

~\AppData\Roaming\Python\Python37\site-packages\torch\autograd\grad_mode.py in decorate_context(*args, **kwargs)
     26         def decorate_context(*args, **kwargs):
     27             with self.__class__():
---> 28                 return func(*args, **kwargs)
     29         return cast(F, decorate_context)
     30 

~\AppData\Roaming\Python\Python37\site-packages\transformers\generation_utils.py in generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, encoder_no_repeat_ngram_size, num_return_sequences, max_time, max_new_tokens, decoder_start_token_id, use_cache, num_beam_groups, diversity_penalty, prefix_allowed_tokens_fn, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, forced_bos_token_id, forced_eos_token_id, remove_invalid_values, synced_gpus, **model_kwargs)
   1024                 return_dict_in_generate=return_dict_in_generate,
   1025                 synced_gpus=synced_gpus,
-> 1026                 **model_kwargs,
   1027             )
   1028 

~\AppData\Roaming\Python\Python37\site-packages\transformers\generation_utils.py in sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, **model_kwargs)
   1533                 return_dict=True,
   1534                 output_attentions=output_attentions,
-> 1535                 output_hidden_states=output_hidden_states,
   1536             )
   1537 

~\AppData\Roaming\Python\Python37\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Roaming\Python\Python37\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py in forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
    983             output_attentions=output_attentions,
    984             output_hidden_states=output_hidden_states,
--> 985             return_dict=return_dict,
    986         )
    987         hidden_states = transformer_outputs[0]

~\AppData\Roaming\Python\Python37\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Local\Temp/ipykernel_8288/2499053029.py in new_forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)
    219                     head_mask=head_mask[i],
    220                     use_cache=use_cache,
--> 221                     output_attentions=output_attentions,
    222                 )
    223 

~\AppData\Roaming\Python\Python37\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Roaming\Python\Python37\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py in forward(self, hidden_states, layer_past, attention_mask, head_mask, use_cache, output_attentions)
    559             head_mask=head_mask,
    560             use_cache=use_cache,
--> 561             output_attentions=output_attentions,
    562         )
    563         attn_output = attn_outputs[0]  # output_attn: a, present, (attentions)

~\AppData\Roaming\Python\Python37\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Roaming\Python\Python37\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py in forward(self, hidden_states, layer_past, attention_mask, head_mask, use_cache, output_attentions)
    501             head_mask=head_mask,
    502             use_cache=use_cache,
--> 503             output_attentions=output_attentions,
    504         )
    505 

~\AppData\Roaming\Python\Python37\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Roaming\Python\Python37\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py in forward(self, hidden_states, attention_mask, layer_past, head_mask, use_cache, output_attentions)
    453             masked_bias=self.masked_bias,
    454             attn_dropout=self.attn_dropout,
--> 455             head_mask=head_mask,
    456         )
    457 

~\AppData\Roaming\Python\Python37\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py in _attn(self, query, key, value, causal_mask, masked_bias, attn_dropout, attention_mask, head_mask)
    276 
    277         attn_weights = torch.matmul(query, key.transpose(-1, -2))
--> 278         attn_weights = torch.where(causal_mask, attn_weights, masked_bias.to(attn_weights.dtype))
    279 
    280         if attention_mask is not None:

RuntimeError: where expected condition to be a boolean tensor, but got a tensor with dtype Float