Open Xiantai01 opened 1 month ago
您好,好像看不到您上传的图片
使用MTL-ALigned的时候,求梯度的时候,比如torch.autograd.grad(fusion_loss, list(fusion.parameters()),loss对模型参数的梯度一直是0。
请问您能提供更详细的调试代码截图吗
def after_train_iter(self, runner): runner.optimizer.zero_grad() if self.detect_anomalous_params: self.detect_anomalous_parameters(runner.outputs['loss'], runner)
shared_parameter = []
for p in list(runner.model.module.backbone.parameters()):
if p.requires_grad:
shared_parameter.append(p)
# for name, p in runner.model.module.backbone.named_parameters():
# if p.requires_grad:
# shared_parameter.append(p)
for p in list(runner.model.module.neck.parameters()):
if p.requires_grad:
shared_parameter.append(p)
# for name, p in runner.model.module.neck.named_parameters():
# if p.requires_grad:
# shared_parameter.append(p)
fusion_parameter = runner.model.module.fusion #各自的参数
detection_parameter_1 = runner.model.module.roi_head #检测网络的参数
detection_parameter_2 = runner.model.module.rpn_head #检测网络的参数
detection_parameter_1 = list(detection_parameter_1.parameters())
detection_parameter_2 = list(detection_parameter_2.parameters())
combined_parameter = detection_parameter_1 + detection_parameter_2
task_specific_params={'0' : list(fusion_parameter.parameters()), '1' : combined_parameter}
# 调试代码
grad = torch.autograd.grad(runner.outputs['loss']['fusion_loss'], detection_parameter_1, retain_graph=True, allow_unused=True)
del runner.outputs['loss']['acc']
self.balancer.step_with_model(
losses=runner.outputs['loss'],
shared_params=shared_parameter,
task_specific_params=task_specific_params,
last_shared_layer_params=None,
iter=runner.iter
)
![Uploading 屏幕截图 2024-10-13 205351.png…]()