[2024-03-20 16:15:45,873] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.76k/4.76k [00:00<00:00, 11.9MB/s]
Traceback (most recent call last):
File "/workspace/GeoChat/geochat/eval/batch_geochat_vqa.py", line 125, in <module>
eval_model(args)
File "/workspace/GeoChat/geochat/eval/batch_geochat_vqa.py", line 32, in eval_model
tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name)
File "/workspace/GeoChat/geochat/model/builder.py", line 124, in load_pretrained_model
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 493, in from_pretrained
return model_class.from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py", line 2700, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/workspace/GeoChat/geochat/model/language_model/geochat_llama.py", line 46, in __init__
self.model = GeoChatLlamaModel(config)
File "/workspace/GeoChat/geochat/model/language_model/geochat_llama.py", line 38, in __init__
super(GeoChatLlamaModel, self).__init__(config)
File "/workspace/GeoChat/geochat/model/geochat_arch.py", line 33, in __init__
self.vision_tower = build_vision_tower(config, delay_load=True)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/builder.py", line 9, in build_vision_tower
return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/clip_encoder.py", line 88, in __init__
self.clip_interpolate_embeddings(image_size=504, patch_size=14)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/clip_encoder.py", line 25, in clip_interpolate_embeddings
state_dict = self.vision_tower.vision_model.embeddings.position_embedding.state_dict()
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1614, in __getattr__
raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'CLIPVisionTower' object has no attribute 'vision_tower'. Did you mean: 'vision_tower_name'?
root@dfbbfcc8de85:/workspace/GeoChat# sh /workspace/GeoChat/scripts/LR.sh
[2024-03-20 16:25:28,364] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Traceback (most recent call last):
File "/workspace/GeoChat/geochat/eval/batch_geochat_vqa.py", line 125, in <module>
eval_model(args)
File "/workspace/GeoChat/geochat/eval/batch_geochat_vqa.py", line 32, in eval_model
tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name)
File "/workspace/GeoChat/geochat/model/builder.py", line 124, in load_pretrained_model
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 493, in from_pretrained
return model_class.from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py", line 2700, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/workspace/GeoChat/geochat/model/language_model/geochat_llama.py", line 46, in __init__
self.model = GeoChatLlamaModel(config)
File "/workspace/GeoChat/geochat/model/language_model/geochat_llama.py", line 38, in __init__
super(GeoChatLlamaModel, self).__init__(config)
File "/workspace/GeoChat/geochat/model/geochat_arch.py", line 33, in __init__
self.vision_tower = build_vision_tower(config, delay_load=True)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/builder.py", line 9, in build_vision_tower
return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/clip_encoder.py", line 88, in __init__
self.clip_interpolate_embeddings(image_size=504, patch_size=14)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/clip_encoder.py", line 25, in clip_interpolate_embeddings
state_dict = self.vision_tower_name.vision_model.embeddings.position_embedding.state_dict()
AttributeError: 'str' object has no attribute 'vision_model'
root@dfbbfcc8de85:/workspace/GeoChat# sh /workspace/GeoChat/scripts/LR.sh
[2024-03-20 16:26:28,295] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Traceback (most recent call last):
File "/workspace/GeoChat/geochat/eval/batch_geochat_vqa.py", line 125, in <module>
eval_model(args)
File "/workspace/GeoChat/geochat/eval/batch_geochat_vqa.py", line 32, in eval_model
tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name)
File "/workspace/GeoChat/geochat/model/builder.py", line 124, in load_pretrained_model
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 493, in from_pretrained
return model_class.from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py", line 2700, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/workspace/GeoChat/geochat/model/language_model/geochat_llama.py", line 46, in __init__
self.model = GeoChatLlamaModel(config)
File "/workspace/GeoChat/geochat/model/language_model/geochat_llama.py", line 38, in __init__
super(GeoChatLlamaModel, self).__init__(config)
File "/workspace/GeoChat/geochat/model/geochat_arch.py", line 33, in __init__
self.vision_tower = build_vision_tower(config, delay_load=True)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/builder.py", line 9, in build_vision_tower
return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/clip_encoder.py", line 88, in __init__
self.clip_interpolate_embeddings(image_size=504, patch_size=14)
File "/workspace/GeoChat/geochat/model/multimodal_encoder/clip_encoder.py", line 25, in clip_interpolate_embeddings
state_dict = self.vision_tower.vision_model.embeddings.position_embedding.state_dict()
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1614, in __getattr__
raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'CLIPVisionTower' object has no attribute 'vision_tower'. Did you mean: 'vision_tower_name'?
The code: