Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 2954188) of binary: /home/yingjc/.conda/envs/FaceChain/bin/python
Traceback (most recent call last):
File "/home/yingjc/.conda/envs/FaceChain/bin/accelerate", line 8, in
sys.exit(main())
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/accelerate/commands/accelerate_cli.py", line 46, in main
args.func(args)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/accelerate/commands/launch.py", line 1073, in launch_command
multi_gpu_launcher(args)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/accelerate/commands/launch.py", line 718, in multi_gpu_launcher
distrib_run.run(args)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/torch/distributed/run.py", line 785, in run
elastic_launch(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 134, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 250, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
Traceback (most recent call last):
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/queueing.py", line 407, in call_prediction
output = await route_utils.call_process_api(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/route_utils.py", line 226, in call_process_api
output = await app.get_blocks().process_api(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/blocks.py", line 1550, in process_api
result = await self.call_function(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/blocks.py", line 1185, in call_function
prediction = await anyio.to_thread.run_sync(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
return await future
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 851, in run
result = context.run(func, args)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/utils.py", line 661, in wrapper
response = f(args, **kwargs)
File "app.py", line 804, in run
train_lora_fn(base_model_path=base_model_path,
File "app.py", line 207, in train_lora_fn
raise gr.Error("训练失败 (Training failed)")
gradio.exceptions.Error: '训练失败 (Training failed)'
报错如下,各位大佬帮忙看看:
Traceback (most recent call last): File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
Traceback (most recent call last):
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 1224, in
main()
File "/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py", line 789, in main
dataset = load_dataset("imagefolder", data_dir=args.dataset_name)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2587, in load_dataset
builder_instance = load_dataset_builder(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 2259, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1793, in dataset_module_factory
return PackagedDatasetModuleFactory(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/load.py", line 1139, in get_module
else get_data_patterns(base_path, download_config=self.download_config)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/datasets/data_files.py", line 499, in get_data_patterns
raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
datasets.data_files.EmptyDatasetError: The directory at /home/yingjc/code/FaceChain/worker_data/qw/training_data/ly261666/cv_portrait_model/person1_labeled doesn't contain any data files
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 2954188) of binary: /home/yingjc/.conda/envs/FaceChain/bin/python
Traceback (most recent call last):
File "/home/yingjc/.conda/envs/FaceChain/bin/accelerate", line 8, in
sys.exit(main())
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/accelerate/commands/accelerate_cli.py", line 46, in main
args.func(args)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/accelerate/commands/launch.py", line 1073, in launch_command
multi_gpu_launcher(args)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/accelerate/commands/launch.py", line 718, in multi_gpu_launcher
distrib_run.run(args)
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/torch/distributed/run.py", line 785, in run
elastic_launch(
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 134, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 250, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
/home/yingjc/code/FaceChain/facechain/train_text_to_image_lora.py FAILED
Failures: [1]: time : 2024-05-20_15:50:39 host : kemove rank : 1 (local_rank: 1) exitcode : 1 (pid: 2954189) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html [2]: time : 2024-05-20_15:50:39 host : kemove rank : 2 (local_rank: 2) exitcode : 1 (pid: 2954190) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html [3]: time : 2024-05-20_15:50:39 host : kemove rank : 3 (local_rank: 3) exitcode : 1 (pid: 2954192) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html [4]: time : 2024-05-20_15:50:39 host : kemove rank : 4 (local_rank: 4) exitcode : 1 (pid: 2954202) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
Root Cause (first observed failure): [0]: time : 2024-05-20_15:50:39 host : kemove rank : 0 (local_rank: 0) exitcode : 1 (pid: 2954188) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
Traceback (most recent call last): File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/queueing.py", line 407, in call_prediction output = await route_utils.call_process_api( File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/route_utils.py", line 226, in call_process_api output = await app.get_blocks().process_api( File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/blocks.py", line 1550, in process_api result = await self.call_function( File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/blocks.py", line 1185, in call_function prediction = await anyio.to_thread.run_sync( File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/anyio/to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread return await future File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 851, in run result = context.run(func, args) File "/home/yingjc/.conda/envs/FaceChain/lib/python3.8/site-packages/gradio/utils.py", line 661, in wrapper response = f(args, **kwargs) File "app.py", line 804, in run train_lora_fn(base_model_path=base_model_path, File "app.py", line 207, in train_lora_fn raise gr.Error("训练失败 (Training failed)") gradio.exceptions.Error: '训练失败 (Training failed)'