inference for tif files

Hi all,

I tried to do inference for large tif files: with resolution 28780x48283 pixels. the python code is:

            from mmengine.utils import get_git_hash
            from mmengine.utils.dl_utils import collect_env as collect_base_env
            import mmdet
            from mmdet.apis import DetInferencer
            import msfa
            from mmdet.apis import init_detector, inference_detector
            from PIL import Image
            import matplotlib.pyplot as plt
            import json
            import os
            import numpy as np
            import PIL.Image

            def main():

                config_file = './local_configs/SARDet/other_backbones/fg_frcnn_dota_pretrain_sar_r101_wavelet.py'
                checkpoint_file = './mmdetection/SAR_Yuxuan_weights/fg_frcnn_dota_pretrain_sar_r101_wavelet/best_coco_bbox_mAP_epoch_12.pth'

                image_dir = '/home/paster/Documents/SAR_inference/Images/Umbra_SAR_imagaes_download/'
                json_file_path = './output_umbra_sar_r101/summary.json'

                inferencer = DetInferencer(model=config_file, weights=checkpoint_file, device='cpu')

                image_path_tif = "/home/paster/Documents/SAR_inference/Images/Umbra_SAR_imagaes_download/Acapulco Flooding_01abc40f-a2ca-4c6d-a77a-3a48275db352_GEC.tif"
                image_path_jpg = "/home/paster/Documents/SAR_inference/Images/val/0000004.jpg"

                infer = inferencer(image_path_jpg, out_dir='./output_umbra_sar_r101')
                for pred in infer['predictions']:
                    print(f"labels={pred['labels']} scores={pred['scores']}")

                print('  ')

                import tifffile as tifi
                img = tifi.imread(image_path_tif)
                infer_tif = inferencer(img, out_dir='./output_umbra_sar_r101')

                for pred in infer_tif['predictions']:
                    print(f"labels={pred['labels']} scores={pred['scores']}")

            if __name__ == '__main__':
                main()

after the running the code it was issue for tif file, which have _batch_input.shape = torch.Size([1, 800, 800]) when your validation jpg files have _batch_input.shape = torch.Size([3, 800, 800]). after changing forward function in data_preprocess.py file:

              def forward(self, data: dict, training: bool = False) -> Union[dict, list]:
                      """Performs normalization, padding, and bgr2rgb conversion based on
                      ``BaseDataPreprocessor``.

                      Args:
                          data (dict): Data sampled from the dataset. If the collate
                              function of DataLoader is :obj:`pseudo_collate`, data will be a
                              list of dict. If the collate function is :obj:`default_collate`,
                              data will be a tuple with batch input tensor and list of data
                              samples.
                          training (bool): Whether to enable training time augmentation. If
                              subclasses override this method, they can perform different
                              preprocessing strategies for training and testing based on the
                              value of ``training``.

                      Returns:
                          dict or list: Data in the same format as the model input.
                      """
                      data = self.cast_data(data)
                      _batch_inputs = data['inputs']

                      # Process data with `pseudo_collate`.
                      if is_seq_of(_batch_inputs, torch.Tensor):
                          batch_inputs = []
                          for _batch_input in _batch_inputs:
                              print('_batch_input.shape = ', _batch_input.shape)  # For debugging

                              # Channel transformation
                              if self._channel_conversion:
                                  # Check if input tensor has shape [1, 800, 800]
                                  if _batch_input.shape[0] == 1:
                                      # Repeat the single channel to create a 3-channel image
                                      _batch_input = _batch_input.repeat(3, 1, 1)
                                  # Convert to RGB from BGR
                                  _batch_input = _batch_input[[2, 1, 0], ...]

                              # Convert to float after channel conversion to ensure efficiency
                              _batch_input = _batch_input.float()

                              # Normalization
                              if self._enable_normalize:
                                  _batch_input = (_batch_input - self.mean) / self.std

                              batch_inputs.append(_batch_input)

                          # Pad and stack tensors
                          batch_inputs = stack_batch(batch_inputs, self.pad_size_divisor, self.pad_value)

                      # Process data with `default_collate`
                      elif isinstance(_batch_inputs, torch.Tensor):
                          assert _batch_inputs.dim() == 4, (
                              'The input of `ImgDataPreprocessor` should be a NCHW tensor '
                              'or a list of tensor, but got a tensor with shape: '
                              f'{_batch_inputs.shape}')

                          if self._channel_conversion:
                              # Check if input tensor has shape [1, 800, 800]
                              if _batch_inputs.shape[1] == 1:
                                  # Repeat the single channel to create a 3-channel image
                                  _batch_inputs = _batch_inputs.repeat(1, 3, 1, 1)
                              # Convert to RGB from BGR
                              _batch_inputs = _batch_inputs[:, [2, 1, 0], ...]

                          # Convert to float after channel conversion to ensure efficiency
                          _batch_inputs = _batch_inputs.float()

                          # Normalize
                          if self._enable_normalize:
                              _batch_inputs = (_batch_inputs - self.mean) / self.std

                          h, w = _batch_inputs.shape[2:]
                          target_h = math.ceil(h / self.pad_size_divisor) * self.pad_size_divisor
                          target_w = math.ceil(w / self.pad_size_divisor) * self.pad_size_divisor
                          pad_h = target_h - h
                          pad_w = target_w - w
                          batch_inputs = F.pad(_batch_inputs, (0, pad_w, 0, pad_h), 'constant', self.pad_value)
                      else:
                          raise TypeError('Output of `cast_data` should be a dict of '
                                          'list/tuple with inputs and data_samples, '
                                          f'but got {type(data)}： {data}')

                      data['inputs'] = batch_inputs
                      data.setdefault('data_samples', None)
                      return data

the inference was killed for tif image:

                      _batch_input.shape =  torch.Size([1, 800, 800])
                      Inference ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━   Killed

do you have an idea how to run your models inference for large tif files which are out of scope of you validation set? which image preprocesssing need I do?

thanks for the help

zcablii / SARDet_100K

inference for tif files #16