Forward

        # with amp.autocast(enabled=cuda):
        print("model.model[0].bn.weight before:")
        print(model.model[0].bn.weight)
        pred = model(imgs)  # forward
        loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
        if RANK != -1:
            loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
        if opt.quad:
            loss *= 4.

        # Backward
        # scaler.scale(loss).backward()
        loss.backward()

        # # ============================= sparsity training ========================== #
        srtmp = opt.sr * (1 - 0.9 * epoch / epochs)
        if opt.st:
            ignore_bn_list = []
            for k, m in model.named_modules():
                # print(k + "---" + str(type(m)))
                # if isinstance(m, Bottleneck):
                #     if m.add:
                #         ignore_bn_list.append(k.rsplit(".", 2)[0] + ".cv1.bn")
                #         ignore_bn_list.append(k + '.cv1.bn')
                #         ignore_bn_list.append(k + '.cv2.bn')
                if isinstance(m, nn.BatchNorm2d) and (k not in ignore_bn_list):
                    m.weight.grad.data.add_(srtmp * torch.sign(m.weight.data))  # L1
                    m.bias.grad.data.add_(opt.sr*10 * torch.sign(m.bias.data))  # L1
        # # ============================= sparsity training ========================== #

        # Optimize
        # if ni - last_opt_step >= accumulate:
        print("model.model[0].bn.weight.grad after:")
        print(model.model[0].bn.weight.grad)
        optimizer.step()
        # scaler.step(optimizer)  # optimizer.step
        # scaler.update()
        print("model.model[0].bn.weight after:")
        print(model.model[0].bn.weight)

Parameter containing: tensor([2.11914, 1.73828, 2.18555, 2.50000, 1.11426, 1.24609, 2.95312, 0.81934, 2.17578, 2.25586, 2.52344, 3.11523, 1.61328, 2.10156, 1.76855, 2.09570, 2.92188, 1.17383, 2.30664, 2.28711, 2.54102, 2.95703, 3.06055, 3.59961, 1.96582, 2.20703, 3.08008, 1.06055, 1.71777, 2.69141, 2.49805, 2.15430, 2.83594, 2.56445, 1.84277, 2.61719, 1.31250, 2.11328, 2.40625, 0.84619, 3.39844, 2.44336, 1.44629, 1.21973, 2.59570, 1.91211, 2.33594, 2.42188, 2.52344, 3.29883, 1.27246, 2.21484, 1.61328, 1.17578, 1.77734, 1.77344, 2.51367, 3.05273, 1.34277, 2.59961, 3.46875, 1.40723, 3.00781, 2.42383], device='cuda:0', requires_grad=True) model.model[0].bn.weight.grad after: tensor([ 1.75973e-04, -1.24160e-03, 7.71124e-04, 1.68284e-04, -3.08524e-05, 3.52873e-05, 2.71649e-04, -6.97412e-05, 1.16599e-03, -1.32226e-04, -3.44711e-04, -1.37540e-04, -1.27639e-04, 2.16433e-04, -1.90370e-04, -3.67745e-04, 1.30074e-04, 4.84745e-04, 5.60192e-04, 3.46283e-04, 1.48443e-04, 3.68081e-04, -1.89473e-05, 2.38092e-04, -2.50304e-04, 8.99445e-04, -2.93946e-04, 4.59142e-04, 9.26678e-04, 2.34621e-04, 5.76800e-06, -9.57086e-05, -1.17953e-04, -3.16530e-04, 2.24184e-04, 3.30295e-04, 3.42924e-04, -6.62573e-05, 3.08914e-04, -2.62792e-04, 6.73363e-05, -8.41171e-05, 2.21043e-04, -6.37600e-06, 2.59514e-05, -8.04854e-04, 4.55177e-04, 8.46961e-04, -3.30701e-04, 6.08296e-04, 1.40503e-04, -1.21659e-04, 4.38148e-04, -2.28777e-05, 6.35203e-04, -2.14394e-04, -2.74256e-04, -5.64065e-04, 1.24003e-03, -3.57309e-05, 8.90109e-05, -4.77516e-04, 1.92485e-04, 2.45803e-04], device='cuda:0') model.model[0].bn.weight after: Parameter containing: tensor([2.11914, 1.73828, 2.18555, 2.50000, 1.11426, 1.24609, 2.95312, 0.81934, 2.17578, 2.25586, 2.52344, 3.11523, 1.61328, 2.10156, 1.76855, 2.09570, 2.92188, 1.17383, 2.30664, 2.28711, 2.54102, 2.95703, 3.06055, 3.59961, 1.96582, 2.20703, 3.08008, 1.06055, 1.71777, 2.69141, 2.49805, 2.15430, 2.83594, 2.56445, 1.84277, 2.61719, 1.31250, 2.11328, 2.40625, 0.84619, 3.39844, 2.44336, 1.44629, 1.21973, 2.59570, 1.91211, 2.33594, 2.42188, 2.52344, 3.29883, 1.27246, 2.21484, 1.61328, 1.17578, 1.77734, 1.77344, 2.51367, 3.05273, 1.34277, 2.59961, 3.46875, 1.40723, 3.00781, 2.42383], device='cuda:0', requires_grad=True)

midasklr / yolov5prune

step后权值没有更新 #122

Forward