Closed dustinjoe closed 1 month ago
I had the same error. Might be a version issue. Apparently, max_batch_size is deprecated.
I edited modeling_ovis.py
in the Ovis model directory and replaced two occurrences of max_batch_size
with batch_size
inside the _get_hybrid_cache_for_llm
function:
def _get_hybrid_cache_for_llm(self, max_batch_size: int, max_cache_len: int):
cache_cls = HybridCache
llm = self.get_llm()
need_new_cache = (
not hasattr(llm, "_cache")
or (not isinstance(llm._cache, cache_cls))
#or llm._cache.max_batch_size != max_batch_size
or llm._cache.batch_size != max_batch_size
or llm._cache.max_cache_len < max_cache_len
)
if need_new_cache:
if hasattr(llm.config, "_pre_quantization_dtype"):
cache_dtype = llm.config._pre_quantization_dtype
else:
cache_dtype = llm.dtype
llm._cache = cache_cls(
config=llm.config,
#max_batch_size=max_batch_size,
batch_size=max_batch_size,
max_cache_len=max_cache_len,
device=llm.device,
dtype=cache_dtype,
)
else:
llm._cache.reset()
return llm._cache
It runs, also multiple times. But I haven't checked if the cache still works as intended.
Thanks! This works for me.
ovis1.6这个包有问题
Hello, thanks for this great work! When I am doing inference using the model, I can succeed for only one inference. Then I would run into this error. Not sure if there is any fix on this? Thanks!
AttributeError Traceback (most recent call last) Cell In[6], line 31 19 with torch.inference_mode(): 20 gen_kwargs = dict( 21 max_new_tokens=1024, 22 do_sample=False, (...) 29 use_cache=True 30 ) ---> 31 output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0] 32 output = text_tokenizer.decode(output_ids, skip_special_tokens=True) 33 print(f'Output:\n{output}')
File ~.cache\huggingface\modules\transformers_modules\Ovis1.6-Gemma2-9B\modelingovis.py:596, in Ovis.generate(self, inputs, **kwargs) 588 , inputs_embeds, labels, attention_mask = self.merge_multimodal( 589 text_input_ids=inputs, 590 text_attention_masks=kwargs.pop('attention_mask'), (...) 593 left_padding=True 594 ) 595 if getattr(self.generation_config, 'cache_implementation') == 'hybrid': # mainly for Gemma2 --> 596 kwargs['past_key_values'] = self._get_hybrid_cache_for_llm( 597 getattr(kwargs, "num_beams", inputs_embeds.shape[0]), kwargs['max_new_tokens'] + inputs_embeds.shape[-2]) 598 self.get_llm()._supports_cache_class = True 599 kwargs['cache_implementation'] = None
File ~.cache\huggingface\modules\transformers_modules\Ovis1.6-Gemma2-9B\modeling_ovis.py:562, in Ovis._get_hybrid_cache_for_llm(self, max_batch_size, max_cache_len) 556 cache_cls = HybridCache 557 llm = self.get_llm() 559 need_new_cache = ( 560 not hasattr(llm, "_cache") 561 or (not isinstance(llm._cache, cache_cls)) --> 562 or llm._cache.max_batch_size != max_batch_size 563 or llm._cache.max_cache_len < max_cache_len 564 ) 566 if need_new_cache: 567 if hasattr(llm.config, "_pre_quantization_dtype"):
File ~\anaconda3\envs\robustbench\Lib\site-packages\torch\nn\modules\module.py:1729, in Module.getattr(self, name) 1727 if name in modules: 1728 return modules[name] -> 1729 raise AttributeError(f"'{type(self).name}' object has no attribute '{name}'")
AttributeError: 'HybridCache' object has no attribute 'max_batch_size'