sb-ai-lab / LightAutoML

Fast and customizable framework for automatic ML model creation (AutoML)
https://developers.sber.ru/portal/products/lightautoml
Apache License 2.0
1.09k stars 48 forks source link

Unable to Process TabularCVAutoML.fit_predict() #80

Open MissTiny opened 1 year ago

MissTiny commented 1 year ago

🐛 Bug

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\automl\presets\tabular_presets.py:549, in TabularAutoML.fit_predict(self, train_data, roles, train_features, cv_iter, valid_data, valid_features, log_file, verbose)
    546 if valid_data is not None:
    547     data, _ = read_data(valid_data, valid_features, self.cpu_limit, self.read_csv_params)
--> 549 oof_pred = super().fit_predict(train, roles=roles, cv_iter=cv_iter, valid_data=valid_data, verbose=verbose)
    551 return cast(NumpyDataset, oof_pred)

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\automl\presets\base.py:205, in AutoMLPreset.fit_predict(self, train_data, roles, train_features, cv_iter, valid_data, valid_features, verbose)
    202 logger.info(f"- memory: {self.memory_limit} GB\n")
    204 self.timer.start()
--> 205 result = super().fit_predict(
    206     train_data,
    207     roles,
    208     train_features,
    209     cv_iter,
    210     valid_data,
    211     valid_features,
    212     verbose=verbose,
    213 )
    215 logger.info("\x1b[1mAutoml preset training completed in {:.2f} seconds\x1b[0m\n".format(self.timer.time_spent))
    216 logger.info(f"Model description:\n{self.create_model_str_desc()}\n")

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\automl\base.py:212, in AutoML.fit_predict(self, train_data, roles, train_features, cv_iter, valid_data, valid_features, verbose)
    206 logger.info(
    207     f"Layer \x1b[1m{leven_number}\x1b[0m train process start. Time left {self.timer.time_left:.2f} secs"
    208 )
    210 for k, ml_pipe in enumerate(level):
--> 212     pipe_pred = ml_pipe.fit_predict(train_valid)
    213     level_predictions.append(pipe_pred)
    214     pipes.append(ml_pipe)

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\pipelines\ml\base.py:120, in MLPipeline.fit_predict(self, train_valid)
    117 train_valid = train_valid.apply_selector(self.pre_selection)
    119 # apply features pipeline
--> 120 train_valid = train_valid.apply_feature_pipeline(self.features_pipeline)
    122 # train and apply post selection
    123 train_valid = train_valid.apply_selector(self.post_selection)

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\validation\base.py:79, in TrainValidIterator.apply_feature_pipeline(self, features_pipeline)
     69 """Apply features pipeline on train data.
     70 
     71 Args:
   (...)
     76 
     77 """
     78 train_valid = copy(self)
---> 79 train_valid.train = features_pipeline.fit_transform(train_valid.train)
     80 return train_valid

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\pipelines\features\base.py:117, in FeaturesPipeline.fit_transform(self, train)
    115 # TODO: Think about input/output features attributes
    116 self._input_features = train.features
--> 117 self._pipeline = self._merge_seq(train) if self.sequential else self._merge(train)
    119 return self._pipeline.fit_transform(train)

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\pipelines\features\base.py:162, in FeaturesPipeline._merge(self, data)
    160 pipes = []
    161 for pipe in self.pipes:
--> 162     pipes.append(pipe(data))
    164 return UnionTransformer(pipes) if len(pipes) > 1 else pipes[-1]

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\pipelines\features\image_pipeline.py:103, in ImageAutoFeatures.create_pipeline(self, train)
     98 imgs = get_columns_by_role(train, "Path")
     99 if len(imgs) > 0:
    100     imgs_processing = SequentialTransformer(
    101         [
    102             ColumnsSelector(keys=imgs),
--> 103             AutoCVWrap(
    104                 self.embed_model,
    105                 self.weights_path,
    106                 self.cache_dir,
    107                 self.subs,
    108                 self.device,
    109                 self.n_jobs,
    110                 self.random_state,
    111                 self.is_advprop,
    112                 self.batch_size,
    113                 self.verbose,
    114             ),
    115             SequentialTransformer([FillInf(), FillnaMedian(), StandardScaler()]),
    116         ]
    117     )
    118     transformers_list.append(imgs_processing)
    120 union_all = UnionTransformer(transformers_list)

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\transformers\image.py:197, in AutoCVWrap.__init__(self, model, weights_path, cache_dir, subs, device, n_jobs, random_state, is_advprop, batch_size, verbose)
    194 self.dicts = {}
    195 self.cache_dir = cache_dir
--> 197 self.transformer = DeepImageEmbedder(
    198     device,
    199     n_jobs,
    200     random_state,
    201     is_advprop,
    202     model,
    203     weights_path,
    204     batch_size,
    205     verbose,
    206 )
    207 self._emb_name = "DI_" + single_text_hash(self.embed_model)
    208 self.emb_size = self.transformer.model.feature_shape

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\image\image.py:300, in DeepImageEmbedder.__init__(self, device, n_jobs, random_state, is_advprop, model_name, weights_path, batch_size, verbose)
    297 self.verbose = verbose
    298 seed_everything(random_state)
--> 300 self.model = EffNetImageEmbedder(model_name, weights_path, self.is_advprop, self.device)

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\lightautoml\image\image.py:196, in EffNetImageEmbedder.__init__(self, model_name, weights_path, is_advprop, device)
    193 super(EffNetImageEmbedder, self).__init__()
    194 self.device = device
    195 self.model = (
--> 196     EfficientNet.from_pretrained(
    197         model_name,
    198         weights_path=weights_path,
    199         advprop=is_advprop,
    200         include_top=False,
    201     )
    202     .eval()
    203     .to(self.device)
    204 )
    205 self.feature_shape = self.get_shape()
    206 self.is_advprop = is_advprop

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\efficientnet_pytorch\model.py:378, in EfficientNet.from_pretrained(cls, model_name, weights_path, advprop, in_channels, num_classes, **override_params)
    351 """Create an efficientnet model according to name.
    352 
    353 Args:
   (...)
    375     A pretrained efficientnet model.
    376 """
    377 model = cls.from_name(model_name, num_classes=num_classes, **override_params)
--> 378 load_pretrained_weights(model, model_name, weights_path=weights_path,
    379                         load_fc=(num_classes == 1000), advprop=advprop)
    380 model._change_in_channels(in_channels)
    381 return model

File D:\anaconda3\envs\RecommenderSystems\lib\site-packages\efficientnet_pytorch\utils.py:613, in load_pretrained_weights(model, model_name, weights_path, load_fc, advprop, verbose)
    610     ret = model.load_state_dict(state_dict, strict=False)
    611     assert set(ret.missing_keys) == set(
    612         ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
--> 613 assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys)
    615 if verbose:
    616     print('Loaded pretrained weights for {}'.format(model_name))

AssertionError: Missing keys when loading pretrained weights: ['_fc.weight', '_fc.bias']

Expected behavior

I noticed that if class EffNetImageEmbedder in lightautoml.image uses

EfficientNet.from_pretrained(
      model_name,
      weights_path=weights_path,
      advprop=is_advprop,
      include_top=True,
 )

Then I would be able to run the code. Please provide a way to modify include_top.

DaniilAnd commented 10 months ago

Hello, someone solve this task ?

DaniilAnd commented 10 months ago

@dev-rinchin Hi! With efficientnet-pytorch==0.7.0 works.