Closed Andrea-Dispe closed 7 months ago
This is happening to me very frequently on my 7900xt now which is a 20GB card. In the past iterations of this build I've been able to create 1280x720 photos somewhat reliably, I am now running out of memory trying to create 1000x600 images and even lower at times. It's deeply frustrating.
Commandline args I'm running are: --opt-sub-quad-attention --medvram --disable-nan-check --autolaunch
I get this error very frequently with the new version. The previous version was very stable.
i updated yesterday, also running into this error, before i upgraded i could upscale like 3 out of 4 times without issues(well had to restart sometimes),but now i can't do it at all, im running on a rx 6700,ryzen 3600,32gb ddr4 with --medvram --precision full --no-half --no-half-vae --opt-sub-quad-attention --disable-nan-check
Model loaded in 7.0s (load weights from disk: 1.0s, create model: 0.5s, apply weights to model: 4.9s, apply half(): 0.3s, move model to device: 0.1s, calculate empty prompt: 0.1s).
20%|█████████████████████████████████████ | 4/20 [00:16<01:05, 4.12s/it]
*** Error completing request██████████████████████████▊ | 4/20 [00:10<00:47, 2.97s/it]
*** Arguments: ('task(o3rw8y5j5ti5c3y)', 'cat', '', [], 20, 'DPM++ 2M Karras', 1, 1, 7, 512, 512, False, 0.7, 2, 'Latent', 0, 0, 0, 'Use same checkpoint', 'Use same sampler', '', '', [], <gradio.routes.Request object at 0x000001D385B15D50>, 0, False, '', 0.8, -1, False, -1, 0, 0, 0, False, False, 'positive', 'comma', 0, False, False, '', 1, '', [], 0, '', [], 0, '', [], True, False, False, False, 0, False) {}
Traceback (most recent call last):
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\call_queue.py", line 57, in f
res = list(func(*args, **kwargs))
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\call_queue.py", line 36, in f
res = func(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\txt2img.py", line 64, in txt2img
processed = processing.process_images(p)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\processing.py", line 733, in process_images
res = process_images_inner(p)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\processing.py", line 871, in process_images_inner
samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\processing.py", line 1144, in sample
samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_samplers_kdiffusion.py", line 238, in sample
samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args=self.sampler_extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_samplers_common.py", line 261, in launch_sampling
return func()
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_samplers_kdiffusion.py", line 238, in <lambda>
samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args=self.sampler_extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\k-diffusion\k_diffusion\sampling.py", line 594, in sample_dpmpp_2m
denoised = model(x, sigmas[i] * s_in, **extra_args)
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_samplers_cfg_denoiser.py", line 169, in forward
x_out = self.inner_model(x_in, sigma_in, cond=make_condition_dict(cond_in, image_cond_in))
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\k-diffusion\k_diffusion\external.py", line 112, in forward
eps = self.get_eps(input * c_in, self.sigma_to_t(sigma), **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\k-diffusion\k_diffusion\external.py", line 138, in get_eps
return self.inner_model.apply_model(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_hijack_utils.py", line 17, in <lambda>
setattr(resolved_obj, func_path[-1], lambda *args, **kwargs: self(*args, **kwargs))
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_hijack_utils.py", line 28, in __call__
return self.__orig_func(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\models\diffusion\ddpm.py", line 858, in apply_model
x_recon = self.model(x_noisy, t, **cond)
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\models\diffusion\ddpm.py", line 1335, in forward
out = self.diffusion_model(x, t, context=cc)
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_unet.py", line 91, in UNetModel_forward
return ldm.modules.diffusionmodules.openaimodel.copy_of_UNetModel_forward_for_webui(self, x, timesteps, context, *args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\modules\diffusionmodules\openaimodel.py", line 802, in forward
h = module(h, emb, context)
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\modules\diffusionmodules\openaimodel.py", line 84, in forward
x = layer(x, context)
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\modules\attention.py", line 334, in forward
x = block(x, context=context[i])
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\modules\attention.py", line 269, in forward
return checkpoint(self._forward, (x, context), self.parameters(), self.checkpoint)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\modules\diffusionmodules\util.py", line 121, in checkpoint
return CheckpointFunction.apply(func, len(inputs), *args)
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\autograd\function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\modules\diffusionmodules\util.py", line 136, in forward
output_tensors = ctx.run_function(*ctx.input_tensors)
File "C:\Users\testUser\stable-diffusion-webui-directml\repositories\stable-diffusion-stability-ai\ldm\modules\attention.py", line 272, in _forward
x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x
File "C:\Users\testUser\stable-diffusion-webui-directml\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_hijack_optimizations.py", line 393, in split_cross_attention_forward_invokeAI
r = einsum_op(q, k, v)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_hijack_optimizations.py", line 367, in einsum_op
return einsum_op_dml(q, k, v)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_hijack_optimizations.py", line 354, in einsum_op_dml
return einsum_op_tensor_mem(q, k, v, (mem_reserved - mem_active) if mem_reserved > mem_active else 1)
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_hijack_optimizations.py", line 336, in einsum_op_tensor_mem
return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1))
File "C:\Users\testUser\stable-diffusion-webui-directml\modules\sd_hijack_optimizations.py", line 308, in einsum_op_slice_1
r[:, i:end] = einsum_op_compvis(q[:, i:end], k, v)
RuntimeError: Could not allocate tensor with 4915840 bytes. There is not enough GPU video memory available!
guess i will look for different commands and see if something works now, created like over 100 images before i updated.
edit: im so sorry i think the issue is something else im having the fatal: No names found, cannot describe anything. bug, didn't noticed it before //was not the issue
edit2: tried to reset everything reinstalled everything, upscaler still crashes
Same for me. First time user. I tried a lot of things, still doesn't work, never able to generate a single image. Maybe I would just give up and replace the bloody AMD card with a Nvidia one.
using this parameters : --opt-sub-quad-attention --no-half-vae --disable-nan-check --medvram
With a 8gb 6600 I can generate up to 960x960 (very slow , not practical) and daily generating 512x768 or 768x768 and then using upscale with up to 4x, it has been difficult to maintain this without running out of memory with a lot of generations but these last months it has become the norm , besides the speed drop with recent versions the generation itself is rather stable. (while generating 512x768 was getting around 2.2 sec / it with a 40w limited 6600 now it is 4.x sec / it no matter what I do , BUT with the same machine same specs comfyui generates around the 2.x sec / it values.)
TLDR ; use correct parameters, and while generating don't use extensive video playback , other apps along side that uses the gpu too much etc. And it is enough to a certain degree.
just install hackintosh and use coreml there.
I simply fixed it by installing Automatic111's branch instead. No need to muck around with parameters, just work straight off installation. Perfect.
Hey there folks of github! I seem to be having a similar issue with my 6700(nonxt). My card seems to only be able to generate basic SD or standard SDXL up to about 1024x1024 with lowvram arg in webui. I don't mind the rather slow speed but I feel like I'm doing something wrong here as my vram maxes out rather quickly even with 10GB. Is this just the plight of the current state of AMD on windows? I'm kinda new here so I'm sorry if this isn't the place to add to the conversation, but I didn't wanna just make a whole new post if one already existed.
--no-half-vae fix my issue
I had a very similar experience. Can anyone publish the default settings in effect at the end of the installation? I'm using a 5700XT 8GB (win10). Everything works great after installation. I changed a setting about the face corrector. When I applied the changes, the pop-up showed me a lot of setting changes that I hadn't made. No problem, but since then I've been unable to generate a new image: it crashes for lack of vram. Before going into all the little details of optimization, I'd like to find all the settings as they were at the end of the installation. I have no trace of them, as I wasn't suspicious at the time and hadn't yet saved any settings. Is it among the json files in the github repository? Kind regards. RuntimeError: Could not allocate tensor with 4915840 bytes. There is not enough GPU video memory available!
Edit : Arguments : -opt-sub-quad-attention --no-half-vae --disable-nan-check --medvram solved the problem. The vram is saturated but the generation doesn't crash. Taken separately, these elements didn't solve the problem. 20sec for a 512px image and 20 steps. (with original a1111 on CPU i7-6700 it was 40sec, well done!) Nevertheless, I didn't have these arguments after installation and it worked fine then. I'd still like to reinsert the original settings to investigate.
I had a very similar experience. Can anyone publish the default settings in effect at the end of the installation? I'm using a 5700XT 8GB (win10). Everything works great after installation. I changed a setting about the face corrector. When I applied the changes, the pop-up showed me a lot of setting changes that I hadn't made. No problem, but since then I've been unable to generate a new image: it crashes for lack of vram. Before going into all the little details of optimization, I'd like to find all the settings as they were at the end of the installation. I have no trace of them, as I wasn't suspicious at the time and hadn't yet saved any settings. Is it among the json files in the github repository? Kind regards. RuntimeError: Could not allocate tensor with 4915840 bytes. There is not enough GPU video memory available!
Edit : Arguments : -opt-sub-quad-attention --no-half-vae --disable-nan-check --medvram solved the problem. The vram is saturated but the generation doesn't crash. Taken separately, these elements didn't solve the problem. 20sec for a 512px image and 20 steps. (with original a1111 on CPU i7-6700 it was 40sec, well done!) Nevertheless, I didn't have these arguments after installation and it worked fine then. I'd still like to reinsert the original settings to investigate.
Just delete config.json and re-launch webui. Then settings would be default.
I simply fixed it by installing Automatic111's branch instead. No need to muck around with parameters, just work straight off installation. Perfect.
Isn't that only for Linux? A111 has no official support for Windows, which is the purpose of this entire branch.
As well for me I tried doing the other ones and yet still not hope
"venv "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\venv\Scripts\Python.exe"
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
Python 3.10.6 (tags/v3.10.6:9c7b4bd, Aug 1 2022, 21:53:49) [MSC v.1932 64 bit (AMD64)]
Version: 1.6.1
Commit hash:
To create a public link, set share=True
in launch()
.
Creating model from config: C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\configs\v1-inference.yaml
Startup time: 10.5s (prepare environment: 0.4s, import torch: 3.0s, import gradio: 1.0s, setup paths: 0.8s, initialize shared: 1.5s, other imports: 0.6s, setup codeformer: 0.1s, setup gfpgan: 0.2s, load scripts: 1.5s, create ui: 0.5s, gradio launch: 1.0s).
Applying attention optimization: sub-quadratic... done.
Model loaded in 52.5s (load weights from disk: 1.3s, create model: 0.5s, apply weights to model: 49.4s, apply half(): 0.6s, calculate empty prompt: 0.6s).
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [01:04<00:00, 3.24s/it]
Error completing request███████████████████████████████████████████████████████████| 20/20 [00:58<00:00, 3.21s/it]
Arguments: ('task(1tsku9gf5xqop8o)', 'moon', '', [], 20, 'DPM++ 2M Karras', 1, 1, 7, 512, 512, False, 0.7, 2, 'Latent', 0, 0, 0, 'Use same checkpoint', 'Use same sampler', '', '', [], <gradio.routes.Request object at 0x00000204EF0FDFC0>, 0, False, '', 0.8, -1, False, -1, 0, 0, 0, False, False, 'positive', 'comma', 0, False, False, '', 1, '', [], 0, '', [], 0, '', [], True, False, False, False, 0, False) {}
Traceback (most recent call last):
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\call_queue.py", line 57, in f
res = list(func(*args, kwargs))
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\call_queue.py", line 36, in f
res = func(*args, *kwargs)
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\txt2img.py", line 64, in txt2img
processed = processing.process_images(p)
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\processing.py", line 733, in process_images
res = process_images_inner(p)
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\processing.py", line 879, in process_images_inner
x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\processing.py", line 594, in decode_latent_batch
sample = decode_first_stage(model, batch[i:i + 1])[0]
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\sd_samplers_common.py", line 76, in decode_first_stage
return samples_to_images_tensor(x, approx_index, model)
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\sd_samplers_common.py", line 58, in samples_to_images_tensor
x_sample = model.decode_first_stage(sample.to(model.first_stage_model.dtype))
File "C:\Users\Admin\Desktop\vtube\stable-diffusion-webui-directml-master\modules\sd_hijack_utils.py", line 17, in
"
I've fixed mine by doing
"@echo off
set PYTHON= set GIT= set VENV_DIR= set COMMANDLINE_ARGS=--medvram --backend directml --no-half --precision full --opt-sub-quad-attention --opt-split-attention-v1 --disable-nan-check --theme dark --autolaunch
REM Set GPU environment variables set CUDA_VISIBLE_DEVICES=0 REM Set the GPU device you want to use set GRADIO_GPU_LIMIT=0.8 REM Set the GPU memory limit to 80% of your total VRAM
call webui.bat "
Is there an existing issue for this?
What happened?
I installed and ran SD following the guide https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs from webui-user.bat and it opens without problems.
The problem is that as soon as I try to generate a simple image giving a simple prompt like "cat" I can see from the task manager that the GPU utilization reach 100% in 2 seconds and then the webui returns the error:
RuntimeError: Could not allocate tensor with 4915840 bytes. There is not enough GPU video memory available!
Steps to reproduce the problem
What should have happened?
Image should have generated normally
Sysinfo
What browsers do you use to access the UI ?
No response
Console logs
Additional information
No response