Closed eaidova closed 3 weeks ago
added support of nanollava model family (qnguyen3/nanoLLaVA).
example of usage:
from PIL import Image from optimum.intel.openvino import OVModelForVisualCausalLM from transformer import AutoTokenizer, AutoProcessor, TextStreamer model_id = "qnguyen3/nanoLLaVA" model = OVModelForVisualCausalLM.from_pretrained(model_id, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_id, trsut_remote_code=True) processor = AutoProcessor.from_pretrained(model.config.mm_vision_tower) prompt = "Describe this image in detail" def process_text_input(text, tokenizer): text_chunks = [tokenizer(chunk).input_ids for chunk in text.split("<image>")] input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) attention_mask = torch.ones_like(input_ids, dtype=torch.int64) return input_ids, attention_mask messages = [{"role": "user", "content": f"<image>\n{prompt}"}] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" image = Image.open(requests.get(url, stream=True).raw) image_tensor = processor(images=image, return_tensors="pt")["pixel_values"][0] input_ids, attention_mask = process_text_input(text, tokenizer) streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) output_ids = model.generate(input_ids, attention_mask=attention_mask, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer)
The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update.
What does this PR do?
added support of nanollava model family (qnguyen3/nanoLLaVA).
example of usage:
Before submitting