Open mexifan opened 1 month ago
我的路径是在C盘,模型也同样下在了C盘根目下新建的comfyui文件夹下
我的路径是在C盘,模型也同样在C盘根目下新建的comfyui文件夹下
是comfyui的文件夹名字,比如你的文件夹叫C:/comfyui123 但是他自动下载会叫 C:/comfyui
错误参考https://github.com/StartHua/Comfyui_CXH_joy_caption/pull/66
修复后可用代码 https://github.com/ihmily/Comfyui_CXH_joy_caption 已提交pr
我的comfyui也是ComfyUI-aki-v1.3,我发现在 E:\ComfyUI-aki-v1.3\custom_nodes\ComfyUI_HF_Servelress_Inference\nodes 这个路径下有个 joy caption.py ,里面有个模型的下载路径
错误参考#66
修复后可用代码 https://github.com/ihmily/Comfyui_CXH_joy_caption 已提交pr 其实之前所有的报错是因为ComfyUI_HF_Servelress_Inference 这个插件,应该是有几个节点自动识别成这个插件,删了就不报错了都正常了,但是目前新版的有问题加载完模型不跑,提示 E:\ComfyUI-aki-v1.3\models\clip\siglip-so400m-patch14-384 E:\ComfyUI-aki-v1.3\models\LLM\Meta-Llama-3.1-8B We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set
max_memory
in to a higher value to use more memory (at your own risk). Some parameters are on the meta device because they were offloaded to the cpu. 就一只停,但是滚回上一版本就好了
我的comfyui也是ComfyUI-aki-v1.3,我发现在 E:\ComfyUI-aki-v1.3\custom_nodes\ComfyUI_HF_Servelress_Inference\nodes 这个路径下有个 joy caption.py ,里面有个模型的下载路径
删除这个插件,用不上这个插件,应该是命名重复节点了
Hi, I fixed this error by changing some lines in the Joy_caption_node.py .
It's my first time posting something, and it seems like I can't upload the py file. So I pasted the entire code below: I hope this is helpful to you guys!
from huggingface_hub import InferenceClient from torch import nn from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM from pathlib import Path import torch import torch.amp.autocast_mode from PIL import Image import os import folder_paths
from .lib.ximg import from .lib.xmodel import
from model_management import get_torch_device DEVICE = get_torch_device() def get_torch_device(): if torch.cuda.is_available():
device = torch.device("cuda:0")
print(f"There are {torch.cuda.device_count()} GPU(s) available.")
print(f"We will use the GPU: {device}")
else:
device = torch.device("cpu")
print("No GPU available, using the CPU instead.")
return device
class JoyPipeline: def init(self): self.clip_model = None self.clip_processor =None self.tokenizer = None self.text_model = None self.image_adapter = None self.parent = None
def clearCache(self):
self.clip_model = None
self.clip_processor =None
self.tokenizer = None
self.text_model = None
self.image_adapter = None
class ImageAdapter(nn.Module): def init(self, input_features: int, output_features: int): super().init() self.linear1 = nn.Linear(input_features, output_features) self.activation = nn.GELU() self.linear2 = nn.Linear(output_features, output_features)
def forward(self, vision_outputs: torch.Tensor):
x = self.linear1(vision_outputs)
x = self.activation(x)
x = self.linear2(x)
return x
class Joy_caption_load:
def __init__(self):
self.model = None
self.pipeline = JoyPipeline()
self.pipeline.parent = self
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": (["unsloth/Meta-Llama-3.1-8B-bnb-4bit", "meta-llama/Meta-Llama-3.1-8B"],),
}
}
CATEGORY = "CXH/LLM"
RETURN_TYPES = ("JoyPipeline",)
FUNCTION = "gen"
def loadCheckPoint(self):
# 清除一波
if self.pipeline != None:
self.pipeline.clearCache()
# clip
model_id = "google/siglip-so400m-patch14-384"
CLIP_PATH = download_hg_model(model_id,"clip")
clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
clip_model = AutoModel.from_pretrained(
CLIP_PATH,
trust_remote_code=True
)
clip_model = clip_model.vision_model
clip_model.eval()
clip_model.requires_grad_(False)
clip_model.to("cuda")
# LLM
MODEL_PATH = download_hg_model(self.model,"LLM")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,use_fast=False)
assert isinstance(tokenizer, PreTrainedTokenizer) or isinstance(tokenizer, PreTrainedTokenizerFast), f"Tokenizer is of type {type(tokenizer)}"
text_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto",trust_remote_code=True)
text_model.eval()
# Image Adapter
adapter_path = os.path.join(folder_paths.models_dir,"Joy_caption","image_adapter.pt")
image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size) # ImageAdapter(clip_model.config.hidden_size, 4096)
image_adapter.load_state_dict(torch.load(adapter_path, map_location="cpu"))
adjusted_adapter = image_adapter #AdjustedImageAdapter(image_adapter, text_model.config.hidden_size)
adjusted_adapter.eval()
adjusted_adapter.to("cuda")
self.pipeline.clip_model = clip_model
self.pipeline.clip_processor = clip_processor
self.pipeline.tokenizer = tokenizer
self.pipeline.text_model = text_model
self.pipeline.image_adapter = adjusted_adapter
def clearCache(self):
if self.pipeline != None:
self.pipeline.clearCache()
def gen(self,model):
if self.model == None or self.model != model or self.pipeline == None:
self.model = model
self.loadCheckPoint()
return (self.pipeline,)
class Joy_caption:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"joy_pipeline": ("JoyPipeline",),
"image": ("IMAGE",),
"prompt": ("STRING", {"multiline": True, "default": "A descriptive caption for this image"},),
"max_new_tokens":("INT", {"default": 1024, "min": 10, "max": 4096, "step": 1}),
"temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.01}),
"cache": ("BOOLEAN", {"default": False}),
}
}
CATEGORY = "CXH/LLM"
RETURN_TYPES = ("STRING",)
FUNCTION = "gen"
def gen(self,joy_pipeline,image,prompt,max_new_tokens,temperature,cache):
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
if joy_pipeline.clip_processor == None :
joy_pipeline.parent.loadCheckPoint()
clip_processor = joy_pipeline.clip_processor
tokenizer = joy_pipeline.tokenizer
clip_model = joy_pipeline.clip_model
image_adapter = joy_pipeline.image_adapter
text_model = joy_pipeline.text_model
input_image = tensor2pil(image)
# Preprocess image
pImge = clip_processor(images=input_image, return_tensors='pt').pixel_values
pImge = pImge.to(DEVICE)
# Tokenize the prompt
prompt = tokenizer.encode(prompt, return_tensors='pt', padding=False, truncation=False, add_special_tokens=False)
# Embed image
with torch.amp.autocast_mode.autocast(device_type='cuda', dtype=torch.float16, enabled=True):
vision_outputs = clip_model(pixel_values=pImge, output_hidden_states=True)
image_features = vision_outputs.hidden_states[-2]
embedded_images = image_adapter(image_features)
embedded_images = embedded_images.to(DEVICE)
# Embed prompt
prompt_embeds = text_model.model.embed_tokens(prompt.to(DEVICE))
assert prompt_embeds.shape == (1, prompt.shape[1], text_model.config.hidden_size), f"Prompt shape is {prompt_embeds.shape}, expected {(1, prompt.shape[1], text_model.config.hidden_size)}"
embedded_bos = text_model.model.embed_tokens(torch.tensor([[tokenizer.bos_token_id]], device=text_model.device, dtype=torch.int64))
# Construct prompts
inputs_embeds = torch.cat([
embedded_bos.expand(embedded_images.shape[0], -1, -1),
embedded_images.to(dtype=embedded_bos.dtype),
prompt_embeds.expand(embedded_images.shape[0], -1, -1),
], dim=1)
input_ids = torch.cat([
torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long),
torch.zeros((1, embedded_images.shape[1]), dtype=torch.long),
prompt,
], dim=1).to(DEVICE)
attention_mask = torch.ones_like(input_ids)
generate_ids = text_model.generate(input_ids, inputs_embeds=inputs_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens, do_sample=True, top_k=10, temperature=temperature, suppress_tokens=None)
# Trim off the prompt
generate_ids = generate_ids[:, input_ids.shape[1]:]
if generate_ids[0][-1] == tokenizer.eos_token_id:
generate_ids = generate_ids[:, :-1]
caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False)[0]
r = caption.strip()
if cache == False:
joy_pipeline.parent.clearCache()
return (r,)
我的路径是E:\ComfyUI-aki-v1.3, 模型下载却是E:\comfyui
ComfyUI Error Report
Error Details
Exception Message: User specified an unsupported autocast device_type 'cuda:0'
Stack Trace