I am profiling a code where function_a calls function_b once. The profiled time for the function_b call within functiona_a is the real time as it matches the time I am measuring, while the total time measured when profiling function_b is much smaller than that:
Here is the profiling result, where function_a is the init() function and function_b is preload_data():
Timer unit: 1 s
Total time: 36.3447 s
File: /workspaces/DyNeRFactor/datamodule/dataset_dynerf.py
Function: __init__ at line 17
Line # Hits Time Per Hit % Time Line Contents
==============================================================
17 @profile
18 def __init__(
19 self,
20 h5_buffers_path,
21 h5_dataset_path,
22 use_light_visibility_buffer=False,
23 num_frames=1,
24 split="train",
25 batch_size=1,
26 num_val_cameras=1,
27 num_val_frames=1,
28 depth_percentile_threshold=100,
29 ):
30 2 0.0 0.0 0.0 self.h5_buffers_path = h5_buffers_path
31 2 0.0 0.0 0.0 self.h5_dataset_path = h5_dataset_path
32 2 0.0 0.0 0.0 self.use_light_visibility_buffer = use_light_visibility_buffer
33 2 0.0 0.0 0.0 self.num_frames = num_frames
34 2 0.0 0.0 0.0 self.batch_size = batch_size
35 2 0.0 0.0 0.0 self.split = split
36 2 0.0 0.0 0.0 self.num_val_cameras = num_val_cameras
37 2 0.0 0.0 0.0 self.num_val_frames = num_val_frames
38 # open both h5 files
39 2 0.0 0.0 0.0 self.h5_buffers = h5py.File(self.h5_buffers_path, "r")
40 2 0.0 0.0 0.0 self.h5_dataset = h5py.File(self.h5_dataset_path, "r")
41 # check that light visibility buffer is present if needed
42 2 0.0 0.0 0.0 assert not (
43 2 0.0 0.0 0.0 self.use_light_visibility_buffer
44 and (
45 2 0.0 0.0 0.0 len(self.h5_buffers["camera0"]["light_visibility"]) == 0
46 2 0.0 0.0 0.0 or "light_visibility" not in self.h5_buffers["camera0"].keys()
47 )
48 ), "light visibility buffer not present in h5 file and use_light_visibility_buffer is True"
49 # check number of frames makes sense
50 2 0.0 0.0 0.0 assert (
51 2 0.0 0.0 0.0 self.num_frames <= self.h5_dataset["dataset"].attrs["n_frames"]
52 ), "num_frames is larger than the number of frames in the dataset"
53 2 0.0 0.0 0.0 assert self.num_frames > 0, "num_frames must be larger than 0"
54 2 0.0 0.0 0.0 assert self.h5_dataset["dataset"].attrs["n_frames"] == len(
55 2 0.0 0.0 0.0 self.h5_dataset["dataset"]["camera0"]["rgbs"]
56 ), "attr num_frames in the dataset does not match the actual number of frames in the dataset"
57 # set n_cameras
58 2 0.0 0.0 0.0 self.n_cameras = self.h5_dataset["dataset"].attrs["n_cameras"]
59 2 0.0 0.0 0.0 self.img_wh = self.h5_dataset["dataset"].attrs["img_wh"]
60 2 0.0 0.0 0.0 self.train_cameras = list(range(self.num_val_cameras, self.n_cameras))
61 2 0.0 0.0 0.0 self.val_cameras = list(range(self.num_val_cameras))
62 2 0.0 0.0 0.0 self.depth_percentile_threshold = depth_percentile_threshold
63 2 0.0 0.0 0.0 self.depth_thresholds = []
64 200 0.0 0.0 0.0 for camera_idx in range(self.n_cameras):
65 200 0.3 0.0 0.9 depths = self.h5_buffers[f"camera{camera_idx}"]["depth"]["0"][:]
66 200 0.1 0.0 0.1 depth_threshold = np.percentile(
67 200 0.0 0.0 0.0 depths[depths != 0],
68 200 0.0 0.0 0.0 self.depth_percentile_threshold,
69 )
70 200 0.0 0.0 0.0 self.depth_thresholds.append(depth_threshold)
71 1 0.0 0.0 0.0 if split == "train":
72 1 35.9 35.9 98.9 self.preload_data()
Total time: 4.20073 s
File: /workspaces/DyNeRFactor/datamodule/dataset_dynerf.py
Function: preload_data at line 74
Line # Hits Time Per Hit % Time Line Contents
==============================================================
74 @profile
75 def preload_data(self):
76 # Crear matrices vacías con dimensiones predefinidas
77 1 0.0 0.0 0.0 all_ray_o_and_d = np.empty(
78 1 0.0 0.0 0.0 (self.n_cameras, np.prod(self.img_wh), 6), dtype=np.float16
79 )
80 1 0.0 0.0 0.0 all_rgbs = np.empty(
81 1 0.0 0.0 0.0 (self.n_cameras, self.num_frames, np.prod(self.img_wh), 3), dtype=np.float16
82 )
83 1 0.0 0.0 0.0 all_normals = np.empty(
84 1 0.0 0.0 0.0 (self.n_cameras, self.num_frames, np.prod(self.img_wh), 3), dtype=np.float16
85 )
86 1 0.0 0.0 0.0 all_depths = np.empty(
87 1 0.0 0.0 0.0 (self.n_cameras, self.num_frames, np.prod(self.img_wh), 1), dtype=np.float16
88 )
89 1 0.0 0.0 0.0 all_alphas = np.empty(
90 1 0.0 0.0 0.0 (self.n_cameras, self.num_frames, np.prod(self.img_wh), 1), dtype=np.float16
91 )
92 1 0.0 0.0 0.0 all_foreground_indices = []
93 1 0.0 0.0 0.0 n_lights = np.shape(self.h5_buffers["camera0"]["light_visibility"]["0"])[1]
94 1 0.0 0.0 0.0 all_light_vis = np.empty(
95 1 0.0 0.0 0.0 (self.n_cameras, self.num_frames, np.prod(self.img_wh), n_lights),
96 1 0.0 0.0 0.0 dtype=np.float16,
97 )
98 # Iterar sobre el número de cámaras
99 100 0.0 0.0 0.8 for camera_idx in tqdm(range(self.n_cameras), desc="preloading data"):
100 # pre-load rays
101 100 0.4 0.0 9.3 rays = self.h5_dataset["dataset"][f"camera{camera_idx}"]["rays"][:]
102 100 0.1 0.0 2.7 all_ray_o_and_d[camera_idx] = rays
103 100 0.0 0.0 0.0 all_foreground_indices.append([])
104 100 0.0 0.0 0.9 frames_in_buffers = list(
105 100 0.0 0.0 0.4 self.h5_buffers[f"camera{camera_idx}"]["depth"].keys()
106 )
107 100 0.0 0.0 0.0 frames_in_buffers = [int(f) for f in frames_in_buffers]
108 1000 0.0 0.0 0.0 for frame_idx in frames_in_buffers:
109 # Cargar la matriz de profundidad correspondiente a la cámara actual y el fotograma actual
110 1000 1.1 0.0 25.0 depths = self.h5_buffers[f"camera{camera_idx}"]["depth"][
111 1000 0.0 0.0 0.0 str(frame_idx)
112 ][:]
113 # Concatenar la matriz de profundidades a la matriz de profundidades generales
114 1000 0.2 0.0 5.3 all_depths[camera_idx, frame_idx] = depths.reshape(-1, 1)
115 # Calcular el umbral de profundidad para el fotograma actual
116 1000 0.4 0.0 8.5 depth_threshold = np.percentile(
117 1000 0.1 0.0 2.0 depths[depths != 0], self.depth_percentile_threshold
118 )
119 # Calcular la máscara binaria para los píxeles en primer plano para el fotograma actual
120 1000 0.0 0.0 0.5 alphas = np.logical_and(depths < depth_threshold, depths != 0)
121 # Asegurarse de que haya píxeles en primer plano para la cámara y el fotograma actual
122 1000 0.0 0.0 0.0 assert (
123 1000 0.0 0.0 0.7 not alphas.sum() == 0
124 ), f"no foreground pixels found for camera {camera_idx} and frame {frame_idx}"
125 # Concatenar la matriz de máscaras binarias a la matriz de máscaras binarias generales
126 1000 0.2 0.0 5.6 all_alphas[camera_idx, frame_idx] = alphas.reshape(-1, 1)
127 # Agregar el índice de los píxeles en primer plano para el fotograma actual a la lista correspondiente
128 1000 0.1 0.0 2.2 all_foreground_indices[camera_idx].append(np.argwhere(alphas).flatten())
129 # Concatenar la matriz de imágenes RGB a la matriz de imágenes RGB generales
130 2000 0.1 0.0 1.7 all_rgbs[camera_idx, frame_idx] = self.h5_dataset["dataset"][
131 1000 0.0 0.0 0.0 f"camera{camera_idx}"
132 2000 0.0 0.0 0.0 ]["rgbs"][f"rgbs_frame_{frame_idx}"][:]
133 # Concatenar la matriz de normales a la matriz de normales generales
134 2000 0.1 0.0 2.6 all_normals[camera_idx, frame_idx] = self.h5_buffers[
135 1000 0.0 0.0 0.0 f"camera{camera_idx}"
136 2000 0.0 0.0 0.0 ]["normals"][str(frame_idx)][:]
137 # Concatenar la matriz de visibilidad de la luz a la matriz de visibilidad de la luz
138 1000 0.0 0.0 0.1 if self.use_light_visibility_buffer:
139 # Obtener los índices de los fotogramas almacenados en el búfer de visibilidad de la luz para la cámara actual
140 1000 0.2 0.0 3.9 frames_in_buffers = list(
141 1000 0.1 0.0 1.9 self.h5_buffers[f"camera{camera_idx}"][
142 1000 0.0 0.0 0.0 "light_visibility"
143 ].keys()
144 )
145 1000 0.0 0.0 0.2 frames_in_buffers = [int(f) for f in frames_in_buffers]
146 # Iterar sobre los fotogramas en el búfer de visibilidad de la luz
147 10000 0.0 0.0 0.4 for frame_idx in frames_in_buffers:
148 # Concatenar la matriz de visibilidad de la luz a la matriz de visibilidad de la luz generales
149 20000 1.0 0.0 24.0 all_light_vis[camera_idx, frame_idx] = self.h5_buffers[
150 10000 0.0 0.0 0.5 f"camera{camera_idx}"
151 20000 0.0 0.0 0.5 ]["light_visibility"][str(frame_idx)][:]
152
153 # Convertir la lista de índices de primer plano en una matriz numpy
154 1 0.0 0.0 0.0 all_foreground_indices = all_foreground_indices
155 # Reorganizar las dimensiones de las matrices numpy
156
157 # Almacenar las matrices numpy en el objeto de la clase para su uso posterior
158 1 0.0 0.0 0.0 self.all_ray_o_and_d = all_ray_o_and_d
159 1 0.0 0.0 0.0 self.all_rgbs = all_rgbs
160 1 0.0 0.0 0.0 self.all_normals = all_normals
161 1 0.0 0.0 0.0 self.all_depths = all_depths
162 1 0.0 0.0 0.0 self.all_alphas = all_alphas
163 1 0.0 0.0 0.0 self.all_foreground_indices = all_foreground_indices
164 1 0.0 0.0 0.0 if self.use_light_visibility_buffer:
165 1 0.0 0.0 0.0 self.all_light_vis = all_light_vis
I am profiling a code where function_a calls function_b once. The profiled time for the function_b call within functiona_a is the real time as it matches the time I am measuring, while the total time measured when profiling function_b is much smaller than that: Here is the profiling result, where function_a is the init() function and function_b is preload_data():