Traceback (most recent call last):
File "/home/LLM/videoxl/videoxl/infer.py", line 17, in
tokenizer, model, imageprocessor, = load_pretrained_model(model_path, None, "llava_qwen", device_map="cuda:0")
File "/home/LLM/videoxl/videoxl/videoxl/model/builder.py", line 215, in load_pretrained_model
model = LlavaQwenForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, kwargs)
File "/home/LLM/videoxl/videoxl/videoxl/model/language_model/llava_qwen.py", line 1498, in from_pretrained
model, loading_info = super().from_pretrained(*args, *kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/modeling_utils.py", line 3404, in from_pretrained
model = cls(config, model_args, model_kwargs)
File "/home/LLM/videoxl/videoxl/videoxl/model/language_model/llava_qwen.py", line 1466, in init
self.model = LlavaQwenModel(config)
File "/home/LLM/videoxl/videoxl/videoxl/model/language_model/llava_qwen.py", line 1454, in init
super(LlavaQwenModel, self).init(config)
File "/home/LLM/videoxl/videoxl/videoxl/model/llava_arch.py", line 40, in init
self.vision_tower = build_vision_tower(config, delay_load=delay_load)
File "/home/LLM/videoxl/videoxl/videoxl/model/multimodal_encoder/builder.py", line 23, in build_vision_tower
raise ValueError(f"Unknown vision tower: {vision_tower}")
Traceback (most recent call last): File "/home/LLM/videoxl/videoxl/infer.py", line 17, in
tokenizer, model, imageprocessor, = load_pretrained_model(model_path, None, "llava_qwen", device_map="cuda:0")
File "/home/LLM/videoxl/videoxl/videoxl/model/builder.py", line 215, in load_pretrained_model
model = LlavaQwenForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, kwargs)
File "/home/LLM/videoxl/videoxl/videoxl/model/language_model/llava_qwen.py", line 1498, in from_pretrained
model, loading_info = super().from_pretrained(*args, *kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/modeling_utils.py", line 3404, in from_pretrained
model = cls(config, model_args, model_kwargs)
File "/home/LLM/videoxl/videoxl/videoxl/model/language_model/llava_qwen.py", line 1466, in init
self.model = LlavaQwenModel(config)
File "/home/LLM/videoxl/videoxl/videoxl/model/language_model/llava_qwen.py", line 1454, in init
super(LlavaQwenModel, self).init(config)
File "/home/LLM/videoxl/videoxl/videoxl/model/llava_arch.py", line 40, in init
self.vision_tower = build_vision_tower(config, delay_load=delay_load)
File "/home/LLM/videoxl/videoxl/videoxl/model/multimodal_encoder/builder.py", line 23, in build_vision_tower
raise ValueError(f"Unknown vision tower: {vision_tower}")