Closed pjbull closed 2 years ago
If you train a model with zamba and then try to use the checkpoint file to do inference without specifying a video loader config, it will fail with the following error like the logs below.
zamba
This is because we can't figure out what defaults to assume for the trained model. This is because we explicitly set model_name to None: https://github.com/drivendataorg/zamba/blob/e30c2aeba1c7b1e0a2a1a43ea85c05eb72d207c8/zamba/models/config.py#L164-L168
model_name
None
And then we use model_name to get the default config: https://github.com/drivendataorg/zamba/blob/e30c2aeba1c7b1e0a2a1a43ea85c05eb72d207c8/zamba/models/config.py#L812-L816
2022-08-24 22:25:36.022 | INFO | zamba.models.config:validate_model_name_and_checkpoint:166 - Using checkpoint file: /storage/mlmodels/3/4d7c5e6e-38e7-44ad-870c-8c85b20e01e4.ckpt. 2022-08-24 22:25:36.035 | INFO | zamba.models.config:check_files_exist_and_load:104 - Checking all 9 filepaths exist. Can take up to a minute for every couple thousand files. 2022-08-24 22:25:36.109 | INFO | zamba.models.config:check_files_exist_and_load:129 - Checking that all videos can be loaded. If you're very confident all your videos can be loaded, you can skip this with `skip_load_validation`, but it's not recommended. 0%| | 0/9 [00:00<?, ?it/s] 11%|█ | 1/9 [00:00<00:00, 8.52it/s] 22%|██▏ | 2/9 [00:00<00:01, 4.89it/s] 33%|███▎ | 3/9 [00:00<00:01, 4.75it/s] 44%|████▍ | 4/9 [00:00<00:00, 5.27it/s] 56%|█████▌ | 5/9 [00:00<00:00, 6.06it/s] 67%|██████▋ | 6/9 [00:01<00:00, 6.52it/s] 78%|███████▊ | 7/9 [00:01<00:00, 6.85it/s] 89%|████████▉ | 8/9 [00:01<00:00, 5.96it/s] 100%|██████████| 9/9 [00:01<00:00, 5.75it/s] 100%|██████████| 9/9 [00:01<00:00, 5.83it/s] 2022-08-24 22:25:37.657 | INFO | zamba.models.config:get_default_video_loader_config:791 - No video loader config specified. Using default for None. ╭───────────────────── Traceback (most recent call last) ──────────────────────╮ │ /usr/local/lib/python3.8/dist-packages/zamba/cli.py:325 in predict │ │ │ │ 322 │ │ │ 323 │ try: │ │ 324 │ │ manager = ModelManager( │ │ ❱ 325 │ │ │ ModelConfig( │ │ 326 │ │ │ │ video_loader_config=video_loader_config, │ │ 327 │ │ │ │ predict_config=PredictConfig(**predict_dict), │ │ 328 │ │ │ ) │ │ │ │ ╭───────────────────────────────── locals ─────────────────────────────────╮ │ │ │ batch_size = None │ │ │ │ checkpoint = None │ │ │ │ config = PosixPath('/storage/inferencejobs/e41f5d7a-b21… │ │ │ │ config_dict = { │ │ │ │ │ 'predict_config': { │ │ │ │ │ │ 'model_cache_dir': │ │ │ │ '/root/model_weight_cache', │ │ │ │ │ │ 'data_dir': '/storage', │ │ │ │ │ │ 'filepaths': │ │ │ │ '/storage/inferencejobs/e41f5d7a-b21a-4dde-985… │ │ │ │ │ │ 'save_dir': │ │ │ │ '/storage/inferencejobs/e41f5d7a-b21a-4dde-985… │ │ │ │ │ │ 'checkpoint': │ │ │ │ '/storage/mlmodels/3/4d7c5e6e-38e7-44ad-870c-8… │ │ │ │ │ │ 'num_workers': 2, │ │ │ │ │ │ 'batch_size': 1, │ │ │ │ │ │ 'weight_download_region': 'eu' │ │ │ │ │ } │ │ │ │ } │ │ │ │ config_file = PosixPath('/storage/inferencejobs/e41f5d7a-b21… │ │ │ │ data_dir = None │ │ │ │ dry_run = None │ │ │ │ f = <_io.TextIOWrapper │ │ │ │ name='/storage/inferencejobs/e41f5d7a-b21a-4dd… │ │ │ │ mode='r' encoding='UTF-8'> │ │ │ │ filepaths = None │ │ │ │ gpus = None │ │ │ │ model = <ModelEnum.time_distributed: │ │ │ │ 'time_distributed'> │ │ │ │ num_workers = None │ │ │ │ output_class_names = None │ │ │ │ overwrite = None │ │ │ │ predict_dict = { │ │ │ │ │ 'model_cache_dir': │ │ │ │ '/root/model_weight_cache', │ │ │ │ │ 'data_dir': '/storage', │ │ │ │ │ 'filepaths': │ │ │ │ '/storage/inferencejobs/e41f5d7a-b21a-4dde-985… │ │ │ │ │ 'save_dir': │ │ │ │ '/storage/inferencejobs/e41f5d7a-b21a-4dde-985… │ │ │ │ │ 'checkpoint': │ │ │ │ '/storage/mlmodels/3/4d7c5e6e-38e7-44ad-870c-8… │ │ │ │ │ 'num_workers': 2, │ │ │ │ │ 'batch_size': 1, │ │ │ │ │ 'weight_download_region': 'eu' │ │ │ │ } │ │ │ │ proba_threshold = None │ │ │ │ save = None │ │ │ │ save_dir = None │ │ │ │ skip_load_validation = None │ │ │ │ video_loader_config = None │ │ │ │ weight_download_region = None │ │ │ │ yes = True │ │ │ ╰──────────────────────────────────────────────────────────────────────────╯ │ │ │ │ /root/pydantic/main.py:339 in pydantic.main.BaseModel.__init__ │ │ │ │ [Errno 2] No such file or directory: '/root/pydantic/main.py' │ │ │ │ /root/pydantic/main.py:1082 in pydantic.main.validate_model │ │ │ │ [Errno 2] No such file or directory: '/root/pydantic/main.py' │ │ │ │ /usr/local/lib/python3.8/dist-packages/zamba/models/config.py:794 in │ │ get_default_video_loader_config │ │ │ │ 791 │ │ │ logger.info(f"No video loader config specified. Using defa │ │ 792 │ │ │ │ │ 793 │ │ │ config_file = MODELS_DIRECTORY / f"{model_name}/config.yam │ │ ❱ 794 │ │ │ with config_file.open() as f: │ │ 795 │ │ │ │ config_dict = yaml.safe_load(f) │ │ 796 │ │ │ │ │ 797 │ │ │ values["video_loader_config"] = VideoLoaderConfig(**config │ │ │ │ ╭───────────────────────────────── locals ─────────────────────────────────╮ │ │ │ cls = <class 'zamba.models.config.ModelConfig'> │ │ │ │ config_file = PosixPath('/usr/local/lib/python3.8/dist-packages/zamba/m… │ │ │ │ model_name = None │ │ │ │ values = { │ │ │ │ │ 'video_loader_config': None, │ │ │ │ │ 'train_config': None, │ │ │ │ │ 'predict_config': │ │ │ │ PredictConfig(data_dir=PosixPath('/storage'), filepaths= │ │ │ │ filepath │ │ │ │ 0 /storage/video/068b049d-abfe-4291-8b27-d73cefc... │ │ │ │ 1 /storage/video/1beca6f0-835f-4ab4-9619-e4752d4... │ │ │ │ 2 /storage/video/388d5384-929a-47e2-adaf-e9d0a9e... │ │ │ │ 3 /storage/video/430416b0-9d94-4902-a680-49d28ab... │ │ │ │ 4 /storage/video/636e7abe-120b-4682-94f9-9d63e7d... │ │ │ │ 5 /storage/video/6dbbb5f9-fa20-458f-9619-66feb4a... │ │ │ │ 6 /storage/video/774b01be-8a27-484f-8f91-d01f668... │ │ │ │ 7 /storage/video/7a732117-50d6-4d12-bbb9-83d040a... │ │ │ │ 8 /storage/video/fa4aed2d-f012-455d-881b-12b5564..., │ │ │ │ checkpoint=PosixPath('/storage/mlmodels/3/4d7c5e6e-38e7-4… │ │ │ │ model_name=None, gpus=1, num_workers=2, batch_size=1, │ │ │ │ save=True, │ │ │ │ save_dir=PosixPath('/storage/inferencejobs/e41f5d7a-b21a-… │ │ │ │ overwrite=False, dry_run=False, proba_threshold=None, │ │ │ │ output_class_names=False, weight_download_region='eu', │ │ │ │ skip_load_validation=False, │ │ │ │ model_cache_dir=PosixPath('/root/model_weight_cache')) │ │ │ │ } │ │ │ ╰──────────────────────────────────────────────────────────────────────────╯ │ │ │ │ /usr/lib/python3.8/pathlib.py:1222 in open │ │ │ │ 1219 │ │ """ │ │ 1220 │ │ if self._closed: │ │ 1221 │ │ │ self._raise_closed() │ │ ❱ 1222 │ │ return io.open(self, mode, buffering, encoding, errors, newli │ │ 1223 │ │ │ │ │ opener=self._opener) │ │ 1224 │ │ │ 1225 │ def read_bytes(self): │ │ │ │ ╭───────────────────────────────── locals ─────────────────────────────────╮ │ │ │ buffering = -1 │ │ │ │ encoding = None │ │ │ │ errors = None │ │ │ │ mode = 'r' │ │ │ │ newline = None │ │ │ │ self = PosixPath('/usr/local/lib/python3.8/dist-packages/zamba/mod… │ │ │ ╰──────────────────────────────────────────────────────────────────────────╯ │ │ │ │ /usr/lib/python3.8/pathlib.py:1078 in _opener │ │ │ │ 1075 │ │ │ 1076 │ def _opener(self, name, flags, mode=0o666): │ │ 1077 │ │ # A stub for the opener argument to built-in open() │ │ ❱ 1078 │ │ return self._accessor.open(self, flags, mode) │ │ 1079 │ │ │ 1080 │ def _raw_open(self, flags, mode=0o777): │ │ 1081 │ │ """ │ │ │ │ ╭───────────────────────────────── locals ─────────────────────────────────╮ │ │ │ flags = 524288 │ │ │ │ mode = 438 │ │ │ │ name = '/usr/local/lib/python3.8/dist-packages/zamba/models/official_m… │ │ │ │ self = PosixPath('/usr/local/lib/python3.8/dist-packages/zamba/models/… │ │ │ ╰──────────────────────────────────────────────────────────────────────────╯ │ ╰──────────────────────────────────────────────────────────────────────────────╯ FileNotFoundError: [Errno 2] No such file or directory: '/usr/local/lib/python3.8/dist-packages/zamba/models/official_models/None/config .yaml'
If you train a model with
zamba
and then try to use the checkpoint file to do inference without specifying a video loader config, it will fail with the following error like the logs below.This is because we can't figure out what defaults to assume for the trained model. This is because we explicitly set
model_name
toNone
: https://github.com/drivendataorg/zamba/blob/e30c2aeba1c7b1e0a2a1a43ea85c05eb72d207c8/zamba/models/config.py#L164-L168And then we use
model_name
to get the default config: https://github.com/drivendataorg/zamba/blob/e30c2aeba1c7b1e0a2a1a43ea85c05eb72d207c8/zamba/models/config.py#L812-L816