salesforce / BLIP

PyTorch code for BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation
BSD 3-Clause "New" or "Revised" License
4.79k stars 640 forks source link

blip_vqa error #211

Open AIWASS23 opened 4 months ago

AIWASS23 commented 4 months ago

def mainVQA(): image_size = 384 # 576 pq?

# Use o diretório do script atual para construir o caminho relativo
script_dir = os.path.dirname(__file__)  # Diretório onde o script está sendo executado
folder_path = os.path.join(script_dir, '..', '..', 'src', 'img_test')  # Caminho relativo

# Garanta que o modelo decoder está baixado e pronto para uso
vqa_path = './models/vqa.pth' # consulte extensions.py

# Verifica se o modelo existe
if not os.path.exists(vqa_path):
    print(f"Model not found at {vqa_path}. Please download it first.")
    return

# Listar arquivos de imagem na pasta
image_files = [f for f in os.listdir(folder_path) if f.endswith('.jpeg') or f.endswith('.jpg')]

# Carregar e gerar respostas para cada imagem
for file_name in image_files:
    image_path = os.path.join(folder_path, file_name)
    image = load_image(image_path, image_size)

    model = blip_vqa(pretrained = vqa_path, image_size = image_size, vit = 'base', med_config = config_path)
    model.eval()
    model = model.to(device)

    question = 'Is the child biting nails, using a pacifier or sucking a thumb?'

    with torch.no_grad():
        # Printando dimensões dos tensores para debug
        print(f"Image tensor shape: {image.shape}")

        answer = model(image, question, train = False, inference='generate')
        print(f'Answer for {file_name}: {answer[0]}')

answer = model(image, question, train = False, inference='generate') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/marcelodearaujo/Desktop/TestBlips/BLIP/models/blip_vqa.py", line 99, in forward outputs = self.text_decoder.generate(input_ids=bos_ids, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/generation/utils.py", line 1953, in generate result = self._beam_search( ^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/generation/utils.py", line 2914, in _beam_search outputs = self( ^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/marcelodearaujo/Desktop/TestBlips/BLIP/models/med.py", line 886, in forward outputs = self.bert( ^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/marcelodearaujo/Desktop/TestBlips/BLIP/models/med.py", line 781, in forward encoder_outputs = self.encoder( ^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/marcelodearaujo/Desktop/TestBlips/BLIP/models/med.py", line 445, in forward layer_outputs = layer_module( ^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/marcelodearaujo/Desktop/TestBlips/BLIP/models/med.py", line 361, in forward cross_attention_outputs = self.crossattention( ^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/marcelodearaujo/Desktop/TestBlips/BLIP/models/med.py", line 277, in forward self_outputs = self.self( ^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/marcelodearaujo/Desktop/TestBlips/BLIP/models/med.py", line 178, in forward attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RuntimeError: The size of tensor a (3) must match the size of tensor b (9) at non-singleton dimension 0

ninadchobe commented 1 month ago

Were you able to resolve this issue ?