Hi, i create a llava model with only 12 layers (instead of 32 layers). However, the evaluation on TextVQA two times slower than the larger llava 7 billion parameters
Code to create new smaller llama
import argparse
import torch
import os
import json
from tqdm import tqdm
import shortuuid
import numpy as np
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.conversation import conv_templates, SeparatorStyle
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
from torch.utils.data import Dataset, DataLoader
from llava.train.llava_trainer import LLaVATrainer
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig
from llava.model import *
from llava.train.llava_trainer import LLaVATrainer
model_path = 'liuhaotian/llava-v1.5-7b'
## Using the config.json for liuhaotian/llava-v1.5-7b
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
config = AutoConfig.from_pretrained(model_path)
## update to 12 layers
config.update({'num_hidden_layers':12})
model_generator = LlavaLlamaForCausalLM
# Create model from config
model = model_generator._from_config(config=config)
print(model)
## Save model , extracted from train.py
trainer = LLaVATrainer(model=model, tokenizer=tokenizer)
output_dir = '/LLaVA/checkpoints/llava_mini'
#state_dict = trainer.model.state_dict()
state_dict = {}
for k,v in model.named_parameters():
state_dict[k] = v
cpu_state_dict = {
key: value.cpu()
for key, value in state_dict.items() if 'vision_tower' not in key
}
del state_dict
trainer._save(output_dir, state_dict=cpu_state_dict)
Question
Hi, i create a llava model with only 12 layers (instead of 32 layers). However, the evaluation on TextVQA two times slower than the larger llava 7 billion parameters
Code to create new smaller llama
The evaluation code: