When running the BLIP Analyze Node, I get the following errors:
Error occurred when executing BLIP Analyze Image:
The size of tensor a (3) must match the size of tensor b (9) at non-singleton dimension 0
File "C:\AI-Generation\ComfyUI\execution.py", line 144, in recursive_execute
output_data, output_ui = get_output_data(obj, input_data_all)
File "C:\AI-Generation\ComfyUI\execution.py", line 74, in get_output_data
return_values = map_node_over_list(obj, input_data_all, obj.FUNCTION, allow_interrupt=True)
File "C:\AI-Generation\ComfyUI\execution.py", line 67, in map_node_over_list
results.append(getattr(obj, func)(slice_dict(input_data_all, i)))
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\WAS_Node_Suite.py", line 10227, in blip_caption_image
answer = model(tensor, question, train=False, inference='generate')
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\blip_vqa.py", line 103, in forward
outputs = self.text_decoder.generate(input_ids=bos_ids,
File "C:\Python310\lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(args, kwargs)
File "C:\Users\Anwender\AppData\Roaming\Python\Python310\site-packages\transformers\generation\utils.py", line 1611, in generate
return self.beam_search(
File "C:\Users\Anwender\AppData\Roaming\Python\Python310\site-packages\transformers\generation\utils.py", line 2909, in beam_search
outputs = self(
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 886, in forward
outputs = self.bert(
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 781, in forward
encoder_outputs = self.encoder(
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 445, in forward
layer_outputs = layer_module(
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 361, in forward
cross_attention_outputs = self.crossattention(
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 277, in forward
self_outputs = self.self(
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 178, in forward
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
This happens for both the annotate and the interrogate model/mode, just the tensor sizes are different in both cases.
either I am doing something wrong with the input image (I just load an image with the standard image load node as the input) or there is a bug in the node. Please have a look at this, it would really help me out.
You likely have the wrong transformers version installed, updated by another custom_node or something, or you updated ComfyUI without re-running the WAS-NS requirements.
When running the BLIP Analyze Node, I get the following errors:
Error occurred when executing BLIP Analyze Image:
The size of tensor a (3) must match the size of tensor b (9) at non-singleton dimension 0
File "C:\AI-Generation\ComfyUI\execution.py", line 144, in recursive_execute output_data, output_ui = get_output_data(obj, input_data_all) File "C:\AI-Generation\ComfyUI\execution.py", line 74, in get_output_data return_values = map_node_over_list(obj, input_data_all, obj.FUNCTION, allow_interrupt=True) File "C:\AI-Generation\ComfyUI\execution.py", line 67, in map_node_over_list results.append(getattr(obj, func)(slice_dict(input_data_all, i))) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\WAS_Node_Suite.py", line 10227, in blip_caption_image answer = model(tensor, question, train=False, inference='generate') File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\blip_vqa.py", line 103, in forward outputs = self.text_decoder.generate(input_ids=bos_ids, File "C:\Python310\lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context return func(args, kwargs) File "C:\Users\Anwender\AppData\Roaming\Python\Python310\site-packages\transformers\generation\utils.py", line 1611, in generate return self.beam_search( File "C:\Users\Anwender\AppData\Roaming\Python\Python310\site-packages\transformers\generation\utils.py", line 2909, in beam_search outputs = self( File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 886, in forward outputs = self.bert( File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 781, in forward encoder_outputs = self.encoder( File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 445, in forward layer_outputs = layer_module( File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 361, in forward cross_attention_outputs = self.crossattention( File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 277, in forward self_outputs = self.self( File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "C:\AI-Generation\ComfyUI\custom_nodes\was-node-suite-comfyui\repos\BLIP\models\med.py", line 178, in forward attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
This happens for both the annotate and the interrogate model/mode, just the tensor sizes are different in both cases. either I am doing something wrong with the input image (I just load an image with the standard image load node as the input) or there is a bug in the node. Please have a look at this, it would really help me out.