Closed Bolvvv closed 11 months ago
same error
I am also having the same problem. Can you give a little more detailed guide?
same error
@yueluhhxx This is a module which we used for loading images from a cluster. It's an internal module of Sensetime, so you cannot install it. Please modify the image loading process yourself if it is convenient for you.
I apologize for any confusion caused earlier.
As mentioned by @MacavityT, the petrel_client
module is an internal component of Sensetime that is utilized for loading images from a distributed storage system. When attempting to load a file from an S3 path like balabala:s3://
, it triggers the initialization of petrel_client
, resulting in the error you encountered. To work with a dataset, you need to modify the image_folder
field in the configuration file.
To illustrate, let's consider this pope testset as an example. You can update the image_folder
field in the config/_base_/dataset/DEFAULT_TEST_POPE_VARIANT.py
file as follows:
Before modification:
POPE_TEST_COMMON_CFG = dict(
type='POPEVQADataset',
image_folder=r'openmmlab1424:s3://openmmlab/datasets/detection/coco/val2014',
)
After modification:
POPE_TEST_COMMON_CFG = dict(
type='POPEVQADataset',
image_folder=r'path/to/coco/val2014/on/your/computer',
)
I hope this clarification helps. If you have any further questions or need additional assistance, please let me know.
Hello, I installed Shikra according to the readme and downloaded the dataset. I tried to perform inference but encountered an error during runtime. The error is "ModuleNotFoundError: No module named 'petrel_client".
My python version is 3.9.2, torch vesion is 2.0.1. The following is the error message
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /root/shikra/mllm/pipeline/finetune.py:141 in │
│ │
│ 138 │
│ 139 │
│ 140 if name == "main": │
│ ❱ 141 │ main() │
│ 142 │
│ │
│ /root/shikra/mllm/pipeline/finetune.py:127 in main │
│ │
│ 124 │ │ │ prefix = f"multitest{k}" │
│ 125 │ │ │ │
│ 126 │ │ │ trainer.compute_metrics = _compute_metrics │
│ ❱ 127 │ │ │ _pred_results = trainer.predict(_ds, metric_key_prefix=_prefix, *gen_kwargs │
│ 128 │ │ │ trainer.log_metrics(_prefix, _pred_results.metrics) # noqa │
│ 129 │ │ │ trainer.save_metrics(_prefix, _pred_results.metrics) # noqa │
│ 130 │ │ │ trainer.save_prediction(_pred_results, file_key_prefix=_prefix) │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/transformers/trainer_seq2seq.py:135 in predict │
│ │
│ 132 │ │ ) │
│ 133 │ │ self._gen_kwargs = gen_kwargs │
│ 134 │ │ │
│ ❱ 135 │ │ return super().predict(test_dataset, ignore_keys=ignore_keys, metric_key_prefix= │
│ 136 │ │
│ 137 │ def prediction_step( │
│ 138 │ │ self, │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/transformers/trainer.py:3020 in predict │
│ │
│ 3017 │ │ start_time = time.time() │
│ 3018 │ │ │
│ 3019 │ │ eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else se │
│ ❱ 3020 │ │ output = eval_loop( │
│ 3021 │ │ │ test_dataloader, description="Prediction", ignore_keys=ignore_keys, metric_k │
│ 3022 │ │ ) │
│ 3023 │ │ total_batch_size = self.args.eval_batch_size self.args.world_size │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/transformers/trainer.py:3115 in evaluation_loop │
│ │
│ 3112 │ │ │
│ 3113 │ │ observed_num_examples = 0 │
│ 3114 │ │ # Main evaluation loop │
│ ❱ 3115 │ │ for step, inputs in enumerate(dataloader): │
│ 3116 │ │ │ # Update the observed num examples │
│ 3117 │ │ │ observed_batch_size = find_batch_size(inputs) │
│ 3118 │ │ │ if observed_batch_size is not None: │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/torch/utils/data/dataloader.py:633 in next │
│ │
│ 630 │ │ │ if self._sampler_iter is None: │
│ 631 │ │ │ │ # TODO(https://github.com/pytorch/pytorch/issues/76750) │
│ 632 │ │ │ │ self._reset() # type: ignore[call-arg] │
│ ❱ 633 │ │ │ data = self._next_data() │
│ 634 │ │ │ self._num_yielded += 1 │
│ 635 │ │ │ if self._dataset_kind == _DatasetKind.Iterable and \ │
│ 636 │ │ │ │ │ self._IterableDataset_len_called is not None and \ │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/torch/utils/data/dataloader.py:677 in _next_data │
│ │
│ 674 │ │
│ 675 │ def _next_data(self): │
│ 676 │ │ index = self._next_index() # may raise StopIteration │
│ ❱ 677 │ │ data = self._dataset_fetcher.fetch(index) # may raise StopIteration │
│ 678 │ │ if self._pin_memory: │
│ 679 │ │ │ data = _utils.pin_memory.pin_memory(data, self._pin_memory_device) │
│ 680 │ │ return data │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py:51 in fetch │
│ │
│ 48 │ │ │ if hasattr(self.dataset, "getitems") and self.dataset.getitems: │
│ 49 │ │ │ │ data = self.dataset.getitems(possibly_batched_index) │
│ 50 │ │ │ else: │
│ ❱ 51 │ │ │ │ data = [self.dataset[idx] for idx in possibly_batched_index] │
│ 52 │ │ else: │
│ 53 │ │ │ data = self.dataset[possibly_batched_index] │
│ 54 │ │ return self.collate_fn(data) │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py:51 in │
│ │
│ 48 │ │ │ if hasattr(self.dataset, "getitems") and self.dataset.getitems: │
│ 49 │ │ │ │ data = self.dataset.getitems(possibly_batched_index) │
│ 50 │ │ │ else: │
│ ❱ 51 │ │ │ │ data = [self.dataset[idx] for idx in possibly_batched_index] │
│ 52 │ │ else: │
│ 53 │ │ │ data = self.dataset[possibly_batched_index] │
│ 54 │ │ return self.collate_fn(data) │
│ │
│ /root/shikra/mllm/dataset/single_image_convsation.py:42 in getitem │
│ │
│ 39 │ │
│ 40 │ def getitem(self, index, debug_mode=False) -> Dict[str, Any]: │
│ 41 │ │ # getitem │
│ ❱ 42 │ │ item = self.get_raw_item(index) │
│ 43 │ │ image: Image.Image = item.get('image', None) │
│ 44 │ │ target: Dict[str, Any] = item.get('target', None) │
│ 45 │ │ raw_conv: List[Dict[str, Any]] = item['conversations'] │
│ │
│ /root/shikra/mllm/dataset/single_image_convsation.py:267 in get_raw_item │
│ │
│ 264 │ │
│ 265 │ def get_raw_item(self, index) -> Dict[str, Any]: │
│ 266 │ │ self.initialize_if_needed() │
│ ❱ 267 │ │ return self.dataset[index] │
│ 268 │ │
│ 269 │ def repr(self) -> str: │
│ 270 │ │ head = "Dataset " + self.class.name │
│ │
│ /root/shikra/mllm/dataset/single_image_dataset/pope.py:16 in getitem │
│ │
│ 13 │ │
│ 14 │ def getitem(self, index): │
│ 15 │ │ item = self.get_raw_item(index) │
│ ❱ 16 │ │ image = self.get_image(image_path=item['image']) │
│ 17 │ │ │
│ 18 │ │ question = item['text'] │
│ 19 │ │ final_question = self.get_template().replace(QUESTION_PLACEHOLDER, question) │
│ │
│ /root/shikra/mllm/dataset/utils/mixin.py:72 in get_image │
│ │
│ 69 │ def get_image(self, image_path): │
│ 70 │ │ if self.image_folder is not None: │
│ 71 │ │ │ image_path = os.path.join(self.image_folder, image_path) │
│ ❱ 72 │ │ image = read_img_general(image_path) │
│ 73 │ │ return image │
│ 74 │ │
│ 75 │ def get_template(self): │
│ │
│ /root/shikra/mllm/dataset/utils/io.py:20 in read_img_general │
│ │
│ 17 │
│ 18 def read_img_general(img_path): │
│ 19 │ if "s3://" in img_path: │
│ ❱ 20 │ │ cv_img = read_img_ceph(img_path) │
│ 21 │ │ # noinspection PyUnresolvedReferences │
│ 22 │ │ return Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)) │
│ 23 │ else: │
│ │
│ /root/shikra/mllm/dataset/utils/io.py:31 in read_img_ceph │
│ │
│ 28 │
│ 29 │
│ 30 def read_img_ceph(img_path): │
│ ❱ 31 │ init_ceph_client_if_needed() │
│ 32 │ img_bytes = client.get(img_path) │
│ 33 │ assert img_bytes is not None, f"Please check image at {img_path}" │
│ 34 │ img_mem_view = memoryview(img_bytes) │
│ │
│ /root/shikra/mllm/dataset/utils/io.py:46 in init_ceph_client_if_needed │
│ │
│ 43 │ if client is None: │
│ 44 │ │ logger.info(f"initializing ceph client ...") │
│ 45 │ │ st = time.time() │
│ ❱ 46 │ │ from petrel_client.client import Client # noqa │
│ 47 │ │ client = Client(enable_mc=True) │
│ 48 │ │ ed = time.time() │
│ 49 │ │ logger.info(f"initialize client cost {ed - st:.2f} s") │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
ModuleNotFoundError: No module named 'petrel_client'
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /root/shikra/venv/bin/accelerate:8 in │
│ │
│ 5 from accelerate.commands.accelerate_cli import main │
│ 6 if name == 'main': │
│ 7 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │
│ ❱ 8 │ sys.exit(main()) │
│ 9 │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/accelerate/commands/accelerate_cli.py:45 in main │
│ │
│ 42 │ │ exit(1) │
│ 43 │ │
│ 44 │ # Run │
│ ❱ 45 │ args.func(args) │
│ 46 │
│ 47 │
│ 48 if name == "main": │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/accelerate/commands/launch.py:941 in │
│ launch_command │
│ │
│ 938 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │
│ 939 │ │ sagemaker_launcher(defaults, args) │
│ 940 │ else: │
│ ❱ 941 │ │ simple_launcher(args) │
│ 942 │
│ 943 │
│ 944 def main(): │
│ │
│ /root/shikra/venv/lib/python3.9/site-packages/accelerate/commands/launch.py:603 in │
│ simple_launcher │
│ │
│ 600 │ process.wait() │
│ 601 │ if process.returncode != 0: │
│ 602 │ │ if not args.quiet: │
│ ❱ 603 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │
│ 604 │ │ else: │
│ 605 │ │ │ sys.exit(1) │
│ 606 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
CalledProcessError: Command '['/root/shikra/venv/bin/python', 'mllm/pipeline/finetune.py', 'config/shikra_eval_multi_pope.py',
'--cfg-options', 'model_args.model_name_or_path=/root/shikra/shikra-7b', '--per_device_eval_batch_size', '1']' returned
non-zero exit status 1.