Open smile0655 opened 1 year ago
腾讯云a800 镜像:gn-images.tencentcloudcr.com/ngc/pytorch-21.07:ssh-ib5.3-config-tccl-py3
python==3.8.10 cuda==11.4 onediff==0.9.0 diffuers==0.15.0 transformers==4.27.1 torch==1.10.0a0+ecc3718 diffusion-benchmark(git_commit)=7d3d0d727eac91a6ca5da00259f37323bfdf64a8 oneflow-diffusers(git_commit)=b50952f87485aa47f588493dc38dd6810c2b9637 root@a800-026:/sd_test# python -m oneflow --doctor path: ['/opt/conda/lib/python3.8/site-packages/oneflow'] version: 0.9.1.dev20230509+cu117 git_commit: b4d2f87 cmake_build_type: Release rdma: True mlir: True
在测试stable diffusion脚本时遇到error,脚本地址:https://github.com/Oneflow-Inc/OneAutoTest/blob/megatron_script_huoshan/onebench/diffusers/run_benchmark.sh 该测试脚本在火山引擎机器上运行正常。 具体运行的python代码:
import os os.environ["ONEFLOW_MLIR_CSE"] = "1" os.environ["ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION"] = "1" os.environ["ONEFLOW_MLIR_ENABLE_ROUND_TRIP"] = "1" os.environ["ONEFLOW_MLIR_FUSE_FORWARD_OPS"] = "1" os.environ["ONEFLOW_MLIR_GROUP_MATMUL"] = "1" os.environ["ONEFLOW_MLIR_PREFER_NHWC"] = "1" os.environ["ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS"] = "1" os.environ["ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR"] = "1" os.environ["ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP"] = "1" os.environ["ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL"] = "1" os.environ["ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" os.environ["ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" import click import oneflow as torch torch.mock_torch.enable() from diffusers import EulerDiscreteScheduler from onediff import OneFlowStableDiffusionPipeline as StableDiffusionPipeline from pathlib import Path @click.command() @click.option("--token") @click.option("--prompt", default="a photo of an astronaut riding a horse on mars") @click.option("--repeat", default=32) @click.option("--output", default="output") @click.option("--height", default=768) @click.option("--width", default=768) def benchmark(token, prompt, repeat, output, height, width): model_id = "stabilityai/stable-diffusion-2" scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler") pipe = StableDiffusionPipeline.from_pretrained( model_id, scheduler=scheduler, revision="fp16", torch_dtype=torch.float16 ) pipe = pipe.to("cuda") output_dir = Path(output).joinpath("stable_diffusion_2") output_dir.mkdir(parents=True, exist_ok=True) for r in range(repeat): cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv" os.system(cmd) images = pipe(prompt, height=height, width=width).images for i, image in enumerate(images): image.save(output_dir.joinpath(f"{r:03d}-{i:02d}.png")) if __name__ == "__main__": benchmark()
经排查,发现是在import onediff的时候出现这个error:
腾讯云a800 镜像:gn-images.tencentcloudcr.com/ngc/pytorch-21.07:ssh-ib5.3-config-tccl-py3
在测试stable diffusion脚本时遇到error,脚本地址:https://github.com/Oneflow-Inc/OneAutoTest/blob/megatron_script_huoshan/onebench/diffusers/run_benchmark.sh 该测试脚本在火山引擎机器上运行正常。 具体运行的python代码:
经排查,发现是在import onediff的时候出现这个error: