pyutils / line_profiler

Line-by-line profiling for Python
Other
2.57k stars 118 forks source link

profiling not matching different scopes #211

Open malfonsoarquimea opened 1 year ago

malfonsoarquimea commented 1 year ago

I am profiling a code where function_a calls function_b once. The profiled time for the function_b call within functiona_a is the real time as it matches the time I am measuring, while the total time measured when profiling function_b is much smaller than that: Here is the profiling result, where function_a is the init() function and function_b is preload_data():

Timer unit: 1 s

Total time: 36.3447 s
File: /workspaces/DyNeRFactor/datamodule/dataset_dynerf.py
Function: __init__ at line 17

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    17                                               @profile
    18                                               def __init__(
    19                                                   self,
    20                                                   h5_buffers_path,
    21                                                   h5_dataset_path,
    22                                                   use_light_visibility_buffer=False,
    23                                                   num_frames=1,
    24                                                   split="train",
    25                                                   batch_size=1,
    26                                                   num_val_cameras=1,
    27                                                   num_val_frames=1,
    28                                                   depth_percentile_threshold=100,
    29                                               ):
    30         2          0.0      0.0      0.0          self.h5_buffers_path = h5_buffers_path
    31         2          0.0      0.0      0.0          self.h5_dataset_path = h5_dataset_path
    32         2          0.0      0.0      0.0          self.use_light_visibility_buffer = use_light_visibility_buffer
    33         2          0.0      0.0      0.0          self.num_frames = num_frames
    34         2          0.0      0.0      0.0          self.batch_size = batch_size
    35         2          0.0      0.0      0.0          self.split = split
    36         2          0.0      0.0      0.0          self.num_val_cameras = num_val_cameras
    37         2          0.0      0.0      0.0          self.num_val_frames = num_val_frames
    38                                                   # open both h5 files
    39         2          0.0      0.0      0.0          self.h5_buffers = h5py.File(self.h5_buffers_path, "r")
    40         2          0.0      0.0      0.0          self.h5_dataset = h5py.File(self.h5_dataset_path, "r")
    41                                                   # check that light visibility buffer is present if needed
    42         2          0.0      0.0      0.0          assert not (
    43         2          0.0      0.0      0.0              self.use_light_visibility_buffer
    44                                                       and (
    45         2          0.0      0.0      0.0                  len(self.h5_buffers["camera0"]["light_visibility"]) == 0
    46         2          0.0      0.0      0.0                  or "light_visibility" not in self.h5_buffers["camera0"].keys()
    47                                                       )
    48                                                   ), "light visibility buffer not present in h5 file and use_light_visibility_buffer is True"
    49                                                   # check number of frames makes sense
    50         2          0.0      0.0      0.0          assert (
    51         2          0.0      0.0      0.0              self.num_frames <= self.h5_dataset["dataset"].attrs["n_frames"]
    52                                                   ), "num_frames is larger than the number of frames in the dataset"
    53         2          0.0      0.0      0.0          assert self.num_frames > 0, "num_frames must be larger than 0"
    54         2          0.0      0.0      0.0          assert self.h5_dataset["dataset"].attrs["n_frames"] == len(
    55         2          0.0      0.0      0.0              self.h5_dataset["dataset"]["camera0"]["rgbs"]
    56                                                   ), "attr num_frames in the dataset does not match the actual number of frames in the dataset"
    57                                                   # set n_cameras
    58         2          0.0      0.0      0.0          self.n_cameras = self.h5_dataset["dataset"].attrs["n_cameras"]
    59         2          0.0      0.0      0.0          self.img_wh = self.h5_dataset["dataset"].attrs["img_wh"]
    60         2          0.0      0.0      0.0          self.train_cameras = list(range(self.num_val_cameras, self.n_cameras))
    61         2          0.0      0.0      0.0          self.val_cameras = list(range(self.num_val_cameras))
    62         2          0.0      0.0      0.0          self.depth_percentile_threshold = depth_percentile_threshold
    63         2          0.0      0.0      0.0          self.depth_thresholds = []
    64       200          0.0      0.0      0.0          for camera_idx in range(self.n_cameras):
    65       200          0.3      0.0      0.9              depths = self.h5_buffers[f"camera{camera_idx}"]["depth"]["0"][:]
    66       200          0.1      0.0      0.1              depth_threshold = np.percentile(
    67       200          0.0      0.0      0.0                  depths[depths != 0],
    68       200          0.0      0.0      0.0                  self.depth_percentile_threshold,
    69                                                       )
    70       200          0.0      0.0      0.0              self.depth_thresholds.append(depth_threshold)
    71         1          0.0      0.0      0.0          if split == "train":
    72         1         35.9     35.9     98.9              self.preload_data()

Total time: 4.20073 s
File: /workspaces/DyNeRFactor/datamodule/dataset_dynerf.py
Function: preload_data at line 74

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    74                                               @profile
    75                                               def preload_data(self):
    76                                                   # Crear matrices vacías con dimensiones predefinidas
    77         1          0.0      0.0      0.0          all_ray_o_and_d = np.empty(
    78         1          0.0      0.0      0.0              (self.n_cameras, np.prod(self.img_wh), 6), dtype=np.float16
    79                                                   )
    80         1          0.0      0.0      0.0          all_rgbs = np.empty(
    81         1          0.0      0.0      0.0              (self.n_cameras, self.num_frames, np.prod(self.img_wh), 3), dtype=np.float16
    82                                                   )
    83         1          0.0      0.0      0.0          all_normals = np.empty(
    84         1          0.0      0.0      0.0              (self.n_cameras, self.num_frames, np.prod(self.img_wh), 3), dtype=np.float16
    85                                                   )
    86         1          0.0      0.0      0.0          all_depths = np.empty(
    87         1          0.0      0.0      0.0              (self.n_cameras, self.num_frames, np.prod(self.img_wh), 1), dtype=np.float16
    88                                                   )
    89         1          0.0      0.0      0.0          all_alphas = np.empty(
    90         1          0.0      0.0      0.0              (self.n_cameras, self.num_frames, np.prod(self.img_wh), 1), dtype=np.float16
    91                                                   )
    92         1          0.0      0.0      0.0          all_foreground_indices = []
    93         1          0.0      0.0      0.0          n_lights = np.shape(self.h5_buffers["camera0"]["light_visibility"]["0"])[1]
    94         1          0.0      0.0      0.0          all_light_vis = np.empty(
    95         1          0.0      0.0      0.0              (self.n_cameras, self.num_frames, np.prod(self.img_wh), n_lights),
    96         1          0.0      0.0      0.0              dtype=np.float16,
    97                                                   )
    98                                                   # Iterar sobre el número de cámaras
    99       100          0.0      0.0      0.8          for camera_idx in tqdm(range(self.n_cameras), desc="preloading data"):
   100                                                       # pre-load rays
   101       100          0.4      0.0      9.3              rays = self.h5_dataset["dataset"][f"camera{camera_idx}"]["rays"][:]
   102       100          0.1      0.0      2.7              all_ray_o_and_d[camera_idx] = rays
   103       100          0.0      0.0      0.0              all_foreground_indices.append([])
   104       100          0.0      0.0      0.9              frames_in_buffers = list(
   105       100          0.0      0.0      0.4                  self.h5_buffers[f"camera{camera_idx}"]["depth"].keys()
   106                                                       )
   107       100          0.0      0.0      0.0              frames_in_buffers = [int(f) for f in frames_in_buffers]
   108      1000          0.0      0.0      0.0              for frame_idx in frames_in_buffers:
   109                                                           # Cargar la matriz de profundidad correspondiente a la cámara actual y el fotograma actual
   110      1000          1.1      0.0     25.0                  depths = self.h5_buffers[f"camera{camera_idx}"]["depth"][
   111      1000          0.0      0.0      0.0                      str(frame_idx)
   112                                                           ][:]
   113                                                           # Concatenar la matriz de profundidades a la matriz de profundidades generales
   114      1000          0.2      0.0      5.3                  all_depths[camera_idx, frame_idx] = depths.reshape(-1, 1)
   115                                                           # Calcular el umbral de profundidad para el fotograma actual
   116      1000          0.4      0.0      8.5                  depth_threshold = np.percentile(
   117      1000          0.1      0.0      2.0                      depths[depths != 0], self.depth_percentile_threshold
   118                                                           )
   119                                                           # Calcular la máscara binaria para los píxeles en primer plano para el fotograma actual
   120      1000          0.0      0.0      0.5                  alphas = np.logical_and(depths < depth_threshold, depths != 0)
   121                                                           # Asegurarse de que haya píxeles en primer plano para la cámara y el fotograma actual
   122      1000          0.0      0.0      0.0                  assert (
   123      1000          0.0      0.0      0.7                      not alphas.sum() == 0
   124                                                           ), f"no foreground pixels found for camera {camera_idx} and frame {frame_idx}"
   125                                                           # Concatenar la matriz de máscaras binarias a la matriz de máscaras binarias generales
   126      1000          0.2      0.0      5.6                  all_alphas[camera_idx, frame_idx] = alphas.reshape(-1, 1)
   127                                                           # Agregar el índice de los píxeles en primer plano para el fotograma actual a la lista correspondiente
   128      1000          0.1      0.0      2.2                  all_foreground_indices[camera_idx].append(np.argwhere(alphas).flatten())
   129                                                           # Concatenar la matriz de imágenes RGB a la matriz de imágenes RGB generales
   130      2000          0.1      0.0      1.7                  all_rgbs[camera_idx, frame_idx] = self.h5_dataset["dataset"][
   131      1000          0.0      0.0      0.0                      f"camera{camera_idx}"
   132      2000          0.0      0.0      0.0                  ]["rgbs"][f"rgbs_frame_{frame_idx}"][:]
   133                                                           # Concatenar la matriz de normales a la matriz de normales generales
   134      2000          0.1      0.0      2.6                  all_normals[camera_idx, frame_idx] = self.h5_buffers[
   135      1000          0.0      0.0      0.0                      f"camera{camera_idx}"
   136      2000          0.0      0.0      0.0                  ]["normals"][str(frame_idx)][:]
   137                                                           # Concatenar la matriz de visibilidad de la luz a la matriz de visibilidad de la luz
   138      1000          0.0      0.0      0.1                  if self.use_light_visibility_buffer:
   139                                                               # Obtener los índices de los fotogramas almacenados en el búfer de visibilidad de la luz para la cámara actual
   140      1000          0.2      0.0      3.9                      frames_in_buffers = list(
   141      1000          0.1      0.0      1.9                          self.h5_buffers[f"camera{camera_idx}"][
   142      1000          0.0      0.0      0.0                              "light_visibility"
   143                                                                   ].keys()
   144                                                               )
   145      1000          0.0      0.0      0.2                      frames_in_buffers = [int(f) for f in frames_in_buffers]
   146                                                               # Iterar sobre los fotogramas en el búfer de visibilidad de la luz
   147     10000          0.0      0.0      0.4                      for frame_idx in frames_in_buffers:
   148                                                                   # Concatenar la matriz de visibilidad de la luz a la matriz de visibilidad de la luz generales
   149     20000          1.0      0.0     24.0                          all_light_vis[camera_idx, frame_idx] = self.h5_buffers[
   150     10000          0.0      0.0      0.5                              f"camera{camera_idx}"
   151     20000          0.0      0.0      0.5                          ]["light_visibility"][str(frame_idx)][:]
   152                                           
   153                                                       # Convertir la lista de índices de primer plano en una matriz numpy
   154         1          0.0      0.0      0.0          all_foreground_indices = all_foreground_indices
   155                                                   # Reorganizar las dimensiones de las matrices numpy
   156                                           
   157                                                   # Almacenar las matrices numpy en el objeto de la clase para su uso posterior
   158         1          0.0      0.0      0.0          self.all_ray_o_and_d = all_ray_o_and_d
   159         1          0.0      0.0      0.0          self.all_rgbs = all_rgbs
   160         1          0.0      0.0      0.0          self.all_normals = all_normals
   161         1          0.0      0.0      0.0          self.all_depths = all_depths
   162         1          0.0      0.0      0.0          self.all_alphas = all_alphas
   163         1          0.0      0.0      0.0          self.all_foreground_indices = all_foreground_indices
   164         1          0.0      0.0      0.0          if self.use_light_visibility_buffer:
   165         1          0.0      0.0      0.0              self.all_light_vis = all_light_vis