Open qwert233 opened 1 year ago
from PIL import Image
from transformers import ChineseCLIPProcessor, ChineseCLIPModel
from diffusers import StableDiffusionXLPipeline
import torch
from pathlib import Path
device = "cuda"
sdxl_model_path = "stable-diffusion-xl-base-1.0"
clip_model_root = "chinese-clip-vit-large-patch14"
clip_text_model = ChineseCLIPModel.from_pretrained(clip_model_root)
processor = ChineseCLIPProcessor.from_pretrained(clip_model_root)
clip_tokenizer = processor.tokenizer
clip_tokenizer.model_max_length = 77 # TODO The 'padding' parameter of the tokenizer has been set to 'max_length' in StableDiffusionXLPipeline. Therefore, we need to explicitly specify 'model_max_length=77,' referencing CLIP.
sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(sdxl_model_path)
sdxl_pipe.text_encoder = clip_text_model
sdxl_pipe.tokenizer = clip_tokenizer
sdxl_pipe.to(device)
prompt = ""
image = sdxl_pipe(prompt, num_inference_steps=50, padding=False).images[0]
image.save("img1.jpg")
我是这么写的. 虽然有瑕疵, 但是可以使用.
想问一下,为什么你引用的是ChineseCLIPModel。而不是ChineseCLIPTextModel呢@HiddenMarkovModel