Kwai-Kolors / Kolors

Kolors Team
Apache License 2.0
3.89k stars 274 forks source link

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0 #76

Open matyhtf opened 4 months ago

matyhtf commented 4 months ago

环境

nvidia-smi 
Thu Jul 25 16:24:00 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.100                Driver Version: 550.100        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA GeForce RTX 4090        Off |   00000000:01:00.0 Off |                  Off |
|  0%   46C    P8             13W /  450W |    1784MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|    0   N/A  N/A      1828      G   /usr/lib/xorg/Xorg                             92MiB |
|    0   N/A  N/A      1973      G   /usr/bin/gnome-shell                           61MiB |
|    0   N/A  N/A    257901      C   python                                       1614MiB |
+-----------------------------------------------------------------------------------------+

错误信息

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py", line 399, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in __call__
    return await self.app(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/applications.py", line 123, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/middleware/errors.py", line 164, in __call__
    await self.app(scope, receive, _send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/middleware/exceptions.py", line 65, in __call__
    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/routing.py", line 756, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/routing.py", line 776, in app
    await route.handle(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/routing.py", line 297, in handle
    await self.app(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/routing.py", line 77, in app
    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/routing.py", line 72, in app
    response = await func(request)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/fastapi/routing.py", line 278, in app
    raw_response = await run_endpoint_function(
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/fastapi/routing.py", line 193, in run_endpoint_function
    return await run_in_threadpool(dependant.call, **values)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/starlette/concurrency.py", line 42, in run_in_threadpool
    return await anyio.to_thread.run_sync(func, *args)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
    return await future
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 859, in run
    result = context.run(func, *args)
  File "/home/swoole/workspace/Kolors/api/main.py", line 50, in imagine
    images = pipe(prompt=prompt,
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
    return func(*args, **kwargs)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/kolors-0.1-py3.8.egg/kolors/pipelines/pipeline_stable_diffusion_xl_chatglm_256.py", line 713, in __call__
    ) = self.encode_prompt(
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/kolors-0.1-py3.8.egg/kolors/pipelines/pipeline_stable_diffusion_xl_chatglm_256.py", line 328, in encode_prompt
    output = text_encoder(
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/hooks.py", line 166, in new_forward
    output = module._old_forward(*args, **kwargs)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/kolors-0.1-py3.8.egg/kolors/models/modeling_chatglm.py", line 812, in forward
    inputs_embeds = self.embedding(input_ids)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/kolors-0.1-py3.8.egg/kolors/models/modeling_chatglm.py", line 728, in forward
    words_embeddings = self.word_embeddings(input_ids)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/sparse.py", line 160, in forward
    return F.embedding(
  File "/home/swoole/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/functional.py", line 2210, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper__index_select)
matyhtf commented 4 months ago

出现的概率非常高,20% 左右。revert 到旧版本没问题

commit a2c759409fe793ff3fd9f93cab5d63729191168c (master)
Author: lizhuang <lizhuang@stumail.neu.edu.cn>
Date:   Fri Jul 5 23:42:11 2024 +0800

    Update README_CN.md

可能最新的 commit 中引入了一些 bug

junqiangwu commented 3 months ago

出现的概率非常高,20% 左右。revert 到旧版本没问题

commit a2c759409fe793ff3fd9f93cab5d63729191168c (master)
Author: lizhuang <lizhuang@stumail.neu.edu.cn>
Date:   Fri Jul 5 23:42:11 2024 +0800

    Update README_CN.md

可能最新的 commit 中引入了一些 bug

Which Python script have you run?

matyhtf commented 3 months ago

python code

The code is copied to scripts/sample.py, and only the parameters are obtained from the fastapi as http apis

from fastapi import FastAPI, Form
from fastapi.staticfiles import StaticFiles

import os, torch
import signal
import random
from datetime import datetime
# from PIL import Image
from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import StableDiffusionXLPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from diffusers import UNet2DConditionModel, AutoencoderKL
from diffusers import EulerDiscreteScheduler

def handler(signum, frame):
    os._exit(0)

signal.signal(signal.SIGUSR1, handler)

root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
app = FastAPI()
app.mount("/static", StaticFiles(directory=f'{root_dir}/outputs'), name="static")

ckpt_dir = f'{root_dir}/weights/Kolors'
text_encoder = ChatGLMModel.from_pretrained(
    f'{ckpt_dir}/text_encoder',
    torch_dtype=torch.float16).half()
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half()
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half()
pipe = StableDiffusionXLPipeline(
        vae=vae,
        text_encoder=text_encoder,
        tokenizer=tokenizer,
        unet=unet,
        scheduler=scheduler,
        force_zeros_for_empty_prompt=False)
pipe = pipe.to("cuda")
pipe.enable_model_cpu_offload()

@app.post("/imagine")
def imagine(prompt: str = Form(), width: int = Form(default=1024), height: int = Form(default=1024), n: int = Form(default=4)):
    random_number = random.randint(1, 2 << 30)
    now = datetime.now()
    formatted_date_time = now.strftime('%Y%m%d%H%M')

    n = min(n, 4)
    width = min(width, 1280)
    height = min(height, 1280)

    try:
        images = pipe(prompt=prompt,
                            height=height,
                            width=width,
                            num_inference_steps=50,
                            guidance_scale=5.0,
                            num_images_per_prompt=n,
                            generator= torch.Generator(pipe.device).manual_seed(random_number)).images
    except torch.cuda.OutOfMemoryError as e:
        os.kill(os.getpid(), signal.SIGUSR1)
        return {'data': None, 'code': 5000, 'message': f"Error: {e}"}

    outputs = []
    for i in range(n):
        image = images[i]
        file = f'{formatted_date_time}-{random_number}-{i}.png'
        image.save(f'{root_dir}/outputs/{file}')
        outputs.append(file)

    return {'data': {'images': outputs, 'seed': random_number}, 'code': 0}

Run with uvicorn

gunicorn -w 1 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9380 main:app