smile0655 / test

0 stars 0 forks source link

腾讯云stable diffusion #11

Open smile0655 opened 1 year ago

smile0655 commented 1 year ago

腾讯云a800 镜像:gn-images.tencentcloudcr.com/ngc/pytorch-21.07:ssh-ib5.3-config-tccl-py3

python==3.8.10
cuda==11.4

onediff==0.9.0
diffuers==0.15.0
transformers==4.27.1
torch==1.10.0a0+ecc3718
diffusion-benchmark(git_commit)=7d3d0d727eac91a6ca5da00259f37323bfdf64a8
oneflow-diffusers(git_commit)=b50952f87485aa47f588493dc38dd6810c2b9637

root@a800-026:/sd_test# python -m oneflow --doctor 
path: ['/opt/conda/lib/python3.8/site-packages/oneflow']
version: 0.9.1.dev20230509+cu117
git_commit: b4d2f87
cmake_build_type: Release
rdma: True
mlir: True

在测试stable diffusion脚本时遇到error,脚本地址:https://github.com/Oneflow-Inc/OneAutoTest/blob/megatron_script_huoshan/onebench/diffusers/run_benchmark.sh image 该测试脚本在火山引擎机器上运行正常。 具体运行的python代码:

import os

os.environ["ONEFLOW_MLIR_CSE"] = "1"
os.environ["ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION"] = "1"
os.environ["ONEFLOW_MLIR_ENABLE_ROUND_TRIP"] = "1"
os.environ["ONEFLOW_MLIR_FUSE_FORWARD_OPS"] = "1"
os.environ["ONEFLOW_MLIR_GROUP_MATMUL"] = "1"
os.environ["ONEFLOW_MLIR_PREFER_NHWC"] = "1"

os.environ["ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS"] = "1"
os.environ["ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR"] = "1"

os.environ["ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP"] = "1"
os.environ["ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL"] = "1"

os.environ["ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1"
os.environ["ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1"

import click
import oneflow as torch
torch.mock_torch.enable()
from diffusers import EulerDiscreteScheduler
from onediff import OneFlowStableDiffusionPipeline as StableDiffusionPipeline

from pathlib import Path

@click.command()
@click.option("--token")
@click.option("--prompt", default="a photo of an astronaut riding a horse on mars")
@click.option("--repeat", default=32)
@click.option("--output", default="output")
@click.option("--height", default=768)
@click.option("--width", default=768)
def benchmark(token, prompt, repeat, output, height, width):
    model_id = "stabilityai/stable-diffusion-2"

    scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
    pipe = StableDiffusionPipeline.from_pretrained(
        model_id, scheduler=scheduler, revision="fp16", torch_dtype=torch.float16
    )
    pipe = pipe.to("cuda")

    output_dir = Path(output).joinpath("stable_diffusion_2")
    output_dir.mkdir(parents=True, exist_ok=True)
    for r in range(repeat):
        cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv" 
        os.system(cmd)
        images = pipe(prompt, height=height, width=width).images
        for i, image in enumerate(images):
            image.save(output_dir.joinpath(f"{r:03d}-{i:02d}.png"))

if __name__ == "__main__":
    benchmark()

经排查,发现是在import onediff的时候出现这个error: image