Linaqruf / kohya-trainer

Adapted from https://note.com/kohya_ss/n/nbf7ce8d80f29 for easier cloning
Apache License 2.0
1.86k stars 306 forks source link

CUDA backend failed to initialize: Found CUDA version 12010(help pls) #340

Open holyaazel opened 8 months ago

holyaazel commented 8 months ago

I've tried to fix this by myself but nothing seems working for me

CUDA backend failed to initialize: Found CUDA version 12010, but JAX was built against version 12020, which is newer. The copy of CUDA that is installed must be at least as new as the version against which JAX was built. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.) ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /content/kohya-trainer/train_network.py:16 in │ │ │ │ 13 from tqdm import tqdm │ │ 14 import torch │ │ 15 from accelerate.utils import set_seed │ │ ❱ 16 from diffusers import DDPMScheduler │ │ 17 │ │ 18 import library.train_util as train_util │ │ 19 from library.train_util import ( │ │ │ │ /usr/local/lib/python3.10/dist-packages/diffusers/init.py:39 in │ │ │ │ 36 │ │ get_scheduler, │ │ 37 │ ) │ │ 38 │ from .pipeline_utils import DiffusionPipeline │ │ ❱ 39 │ from .pipelines import ( │ │ 40 │ │ DanceDiffusionPipeline, │ │ 41 │ │ DDIMPipeline, │ │ 42 │ │ DDPMPipeline, │ │ │ │ /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/init.py:42 in │ │ │ │ 39 except OptionalDependencyNotAvailable: │ │ 40 │ from ..utils.dummy_torch_and_transformers_objects import * # noqa F403 │ │ 41 else: │ │ ❱ 42 │ from .alt_diffusion import AltDiffusionImg2ImgPipeline, AltDiffusionPipeline │ │ 43 │ from .latent_diffusion import LDMTextToImagePipeline │ │ 44 │ from .paint_by_example import PaintByExamplePipeline │ │ 45 │ from .stable_diffusion import ( │ │ │ │ /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/alt_diffusion/init.py:33 in │ │ │ │ │ │ 30 │ │ 31 if is_transformers_available() and is_torch_available(): │ │ 32 │ from .modeling_roberta_series import RobertaSeriesModelWithTransformation │ │ ❱ 33 │ from .pipeline_alt_diffusion import AltDiffusionPipeline │ │ 34 │ from .pipeline_alt_diffusion_img2img import AltDiffusionImg2ImgPipeline │ │ 35 │ │ │ │ /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion │ │ .py:36 in │ │ │ │ 33 │ PNDMScheduler, │ │ 34 ) │ │ 35 from ...utils import deprecate, logging │ │ ❱ 36 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker │ │ 37 from . import AltDiffusionPipelineOutput, RobertaSeriesModelWithTransformation │ │ 38 │ │ 39 │ │ │ │ /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/stable_diffusion/init.py:40 in │ │ │ │ │ │ 37 │ │ 38 │ │ 39 if is_transformers_available() and is_torch_available(): │ │ ❱ 40 │ from .pipeline_cycle_diffusion import CycleDiffusionPipeline │ │ 41 │ from .pipeline_stable_diffusion import StableDiffusionPipeline │ │ 42 │ from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline │ │ 43 │ from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline │ │ │ │ /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/stable_diffusion/pipeline_cycle_diff │ │ usion.py:24 in │ │ │ │ 21 import PIL │ │ 22 from diffusers.utils import is_accelerate_available │ │ 23 from packaging import version │ │ ❱ 24 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer │ │ 25 │ │ 26 from ...configuration_utils import FrozenDict │ │ 27 from ...models import AutoencoderKL, UNet2DConditionModel │ │ in _handle_fromlist:1075 │ │ │ │ /usr/local/lib/python3.10/dist-packages/transformers/utils/import_utils.py:1101 in getattr │ │ │ │ 1098 │ │ │ value = self._get_module(name) │ │ 1099 │ │ elif name in self._class_to_module.keys(): │ │ 1100 │ │ │ module = self._get_module(self._class_to_module[name]) │ │ ❱ 1101 │ │ │ value = getattr(module, name) │ │ 1102 │ │ else: │ │ 1103 │ │ │ raise AttributeError(f"module {self.name} has no attribute {name}") │ │ 1104 │ │ │ │ /usr/local/lib/python3.10/dist-packages/transformers/utils/import_utils.py:1100 in getattr │ │ │ │ 1097 │ │ if name in self._modules: │ │ 1098 │ │ │ value = self._get_module(name) │ │ 1099 │ │ elif name in self._class_to_module.keys(): │ │ ❱ 1100 │ │ │ module = self._get_module(self._class_to_module[name]) │ │ 1101 │ │ │ value = getattr(module, name) │ │ 1102 │ │ else: │ │ 1103 │ │ │ raise AttributeError(f"module {self.name} has no attribute {name}") │ │ │ │ /usr/local/lib/python3.10/dist-packages/transformers/utils/import_utils.py:1110 in _get_module │ │ │ │ 1107 │ │ │ 1108 │ def _get_module(self, module_name: str): │ │ 1109 │ │ try: │ │ ❱ 1110 │ │ │ return importlib.import_module("." + module_name, self.name) │ │ 1111 │ │ except Exception as e: │ │ 1112 │ │ │ raise RuntimeError( │ │ 1113 │ │ │ │ f"Failed to import {self.name}.{module_name} because of the followin │ │ │ │ /usr/lib/python3.10/importlib/init.py:126 in import_module │ │ │ │ 123 │ │ │ if character != '.': │ │ 124 │ │ │ │ break │ │ 125 │ │ │ level += 1 │ │ ❱ 126 │ return _bootstrap._gcd_import(name[level:], package, level) │ │ 127 │ │ 128 │ │ 129 _RELOADING = {} │ │ │ │ /usr/local/lib/python3.10/dist-packages/transformers/models/clip/modeling_clip.py:676 in │ │ │ │ │ │ 673 │ │ ) │ │ 674 │ │ 675 │ │ ❱ 676 class CLIPTextTransformer(nn.Module): │ │ 677 │ def init(self, config: CLIPTextConfig): │ │ 678 │ │ super().init() │ │ 679 │ │ self.config = config │ │ │ │ /usr/local/lib/python3.10/dist-packages/transformers/models/clip/modeling_clip.py:687 in │ │ CLIPTextTransformer │ │ │ │ 684 │ │ │ 685 │ @add_start_docstrings_to_model_forward(CLIP_TEXT_INPUTS_DOCSTRING) │ │ 686 │ @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=CLIP │ │ ❱ 687 │ def forward( │ │ 688 │ │ self, │ │ 689 │ │ input_ids: Optional[torch.Tensor] = None, │ │ 690 │ │ attention_mask: Optional[torch.Tensor] = None, │ │ │ │ /usr/local/lib/python3.10/dist-packages/transformers/utils/doc.py:1152 in docstring_decorator │ │ │ │ 1149 │ │ func_doc = fn.doc │ │ 1150 │ │ lines = func_doc.split("\n") │ │ 1151 │ │ i = 0 │ │ ❱ 1152 │ │ while i < len(lines) and re.search(r"^\sReturns?:\s$", lines[i]) is None: │ │ 1153 │ │ │ i += 1 │ │ 1154 │ │ if i < len(lines): │ │ 1155 │ │ │ indent = len(_get_indent(lines[i])) │ │ │ │ /usr/lib/python3.10/re.py:200 in search │ │ │ │ 197 def search(pattern, string, flags=0): │ │ 198 │ """Scan through string looking for a match to the pattern, returning │ │ 199 │ a Match object, or None if no match was found.""" │ │ ❱ 200 │ return _compile(pattern, flags).search(string) │ │ 201 │ │ 202 def sub(pattern, repl, string, count=0, flags=0): │ │ 203 │ """Return the string obtained by replacing the leftmost │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ KeyboardInterrupt ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /usr/lib/python3.10/subprocess.py:1209 in wait │ │ │ │ 1206 │ │ if timeout is not None: │ │ 1207 │ │ │ endtime = _time() + timeout │ │ 1208 │ │ try: │ │ ❱ 1209 │ │ │ return self._wait(timeout=timeout) │ │ 1210 │ │ except KeyboardInterrupt: │ │ 1211 │ │ │ # https://bugs.python.org/issue25942 │ │ 1212 │ │ │ # The first keyboard interrupt waits briefly for the child to │ │ │ │ /usr/lib/python3.10/subprocess.py:1959 in _wait │ │ │ │ 1956 │ │ │ │ │ with self._waitpid_lock: │ │ 1957 │ │ │ │ │ │ if self.returncode is not None: │ │ 1958 │ │ │ │ │ │ │ break # Another thread waited. │ │ ❱ 1959 │ │ │ │ │ │ (pid, sts) = self._try_wait(0) │ │ 1960 │ │ │ │ │ │ # Check the pid and loop as waitpid has been known to │ │ 1961 │ │ │ │ │ │ # return 0 even without WNOHANG in odd situations. │ │ 1962 │ │ │ │ │ │ # http://bugs.python.org/issue14396. │ │ │ │ /usr/lib/python3.10/subprocess.py:1917 in _try_wait │ │ │ │ 1914 │ │ def _try_wait(self, wait_flags): │ │ 1915 │ │ │ """All callers to this function MUST hold self._waitpid_lock.""" │ │ 1916 │ │ │ try: │ │ ❱ 1917 │ │ │ │ (pid, sts) = os.waitpid(self.pid, wait_flags) │ │ 1918 │ │ │ except ChildProcessError: │ │ 1919 │ │ │ │ # This happens if SIGCLD is set to be ignored or waiting │ │ 1920 │ │ │ │ # for child processes has otherwise been disabled for our │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ KeyboardInterrupt

During handling of the above exception, another exception occurred:

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /usr/local/bin/accelerate:8 in │ │ │ │ 5 from accelerate.commands.accelerate_cli import main │ │ 6 if name == 'main': │ │ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 8 │ sys.exit(main()) │ │ 9 │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:1104 in launch_command │ │ │ │ 1101 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 1102 │ │ sagemaker_launcher(defaults, args) │ │ 1103 │ else: │ │ ❱ 1104 │ │ simple_launcher(args) │ │ 1105 │ │ 1106 │ │ 1107 def main(): │ │ │ │ /usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py:565 in simple_launcher │ │ │ │ 562 │ current_env["OMP_NUM_THREADS"] = str(args.num_cpu_threads_per_process) │ │ 563 │ │ │ 564 │ process = subprocess.Popen(cmd, env=current_env) │ │ ❱ 565 │ process.wait() │ │ 566 │ if process.returncode != 0: │ │ 567 │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 568 │ │ │ │ /usr/lib/python3.10/subprocess.py:1222 in wait │ │ │ │ 1219 │ │ │ │ sigint_timeout = self._sigint_wait_secs │ │ 1220 │ │ │ self._sigint_wait_secs = 0 # nothing else should wait. │ │ 1221 │ │ │ try: │ │ ❱ 1222 │ │ │ │ self._wait(timeout=sigint_timeout) │ │ 1223 │ │ │ except TimeoutExpired: │ │ 1224 │ │ │ │ pass │ │ 1225 │ │ │ raise # resume the KeyboardInterrupt │ │ │ │ /usr/lib/python3.10/subprocess.py:1953 in _wait │ │ │ │ 1950 │ │ │ │ │ if remaining <= 0: │ │ 1951 │ │ │ │ │ │ raise TimeoutExpired(self.args, timeout) │ │ 1952 │ │ │ │ │ delay = min(delay * 2, remaining, .05) │ │ ❱ 1953 │ │ │ │ │ time.sleep(delay) │ │ 1954 │ │ │ else: │ │ 1955 │ │ │ │ while self.returncode is None: │ │ 1956 │ │ │ │ │ with self._waitpid_lock: │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ KeyboardInterrupt ^C

DEX-1101 commented 8 months ago

did you already try a fresh notebook?

ride5k commented 8 months ago

same here, fresh notebook: CUDA backend failed to initialize: Found CUDA version 12010, but JAX was built against version 12020, which is newer. The copy of CUDA that is installed must be at least as new as the version against which JAX was built. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)

DEX-1101 commented 8 months ago

Screenshot_1 try add this command : !wget https://github.com/DEX-1101/kohya-trainer/raw/main/requirements.txt -O /content/requirements.txt