Closed 1437513221 closed 1 year ago
# with amp.autocast(enabled=cuda): print("model.model[0].bn.weight before:") print(model.model[0].bn.weight) pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward # scaler.scale(loss).backward() loss.backward() # # ============================= sparsity training ========================== # srtmp = opt.sr * (1 - 0.9 * epoch / epochs) if opt.st: ignore_bn_list = [] for k, m in model.named_modules(): # print(k + "---" + str(type(m))) # if isinstance(m, Bottleneck): # if m.add: # ignore_bn_list.append(k.rsplit(".", 2)[0] + ".cv1.bn") # ignore_bn_list.append(k + '.cv1.bn') # ignore_bn_list.append(k + '.cv2.bn') if isinstance(m, nn.BatchNorm2d) and (k not in ignore_bn_list): m.weight.grad.data.add_(srtmp * torch.sign(m.weight.data)) # L1 m.bias.grad.data.add_(opt.sr*10 * torch.sign(m.bias.data)) # L1 # # ============================= sparsity training ========================== # # Optimize # if ni - last_opt_step >= accumulate: print("model.model[0].bn.weight.grad after:") print(model.model[0].bn.weight.grad) optimizer.step() # scaler.step(optimizer) # optimizer.step # scaler.update() print("model.model[0].bn.weight after:") print(model.model[0].bn.weight)
Parameter containing: tensor([2.11914, 1.73828, 2.18555, 2.50000, 1.11426, 1.24609, 2.95312, 0.81934, 2.17578, 2.25586, 2.52344, 3.11523, 1.61328, 2.10156, 1.76855, 2.09570, 2.92188, 1.17383, 2.30664, 2.28711, 2.54102, 2.95703, 3.06055, 3.59961, 1.96582, 2.20703, 3.08008, 1.06055, 1.71777, 2.69141, 2.49805, 2.15430, 2.83594, 2.56445, 1.84277, 2.61719, 1.31250, 2.11328, 2.40625, 0.84619, 3.39844, 2.44336, 1.44629, 1.21973, 2.59570, 1.91211, 2.33594, 2.42188, 2.52344, 3.29883, 1.27246, 2.21484, 1.61328, 1.17578, 1.77734, 1.77344, 2.51367, 3.05273, 1.34277, 2.59961, 3.46875, 1.40723, 3.00781, 2.42383], device='cuda:0', requires_grad=True) model.model[0].bn.weight.grad after: tensor([ 1.75973e-04, -1.24160e-03, 7.71124e-04, 1.68284e-04, -3.08524e-05, 3.52873e-05, 2.71649e-04, -6.97412e-05, 1.16599e-03, -1.32226e-04, -3.44711e-04, -1.37540e-04, -1.27639e-04, 2.16433e-04, -1.90370e-04, -3.67745e-04, 1.30074e-04, 4.84745e-04, 5.60192e-04, 3.46283e-04, 1.48443e-04, 3.68081e-04, -1.89473e-05, 2.38092e-04, -2.50304e-04, 8.99445e-04, -2.93946e-04, 4.59142e-04, 9.26678e-04, 2.34621e-04, 5.76800e-06, -9.57086e-05, -1.17953e-04, -3.16530e-04, 2.24184e-04, 3.30295e-04, 3.42924e-04, -6.62573e-05, 3.08914e-04, -2.62792e-04, 6.73363e-05, -8.41171e-05, 2.21043e-04, -6.37600e-06, 2.59514e-05, -8.04854e-04, 4.55177e-04, 8.46961e-04, -3.30701e-04, 6.08296e-04, 1.40503e-04, -1.21659e-04, 4.38148e-04, -2.28777e-05, 6.35203e-04, -2.14394e-04, -2.74256e-04, -5.64065e-04, 1.24003e-03, -3.57309e-05, 8.90109e-05, -4.77516e-04, 1.92485e-04, 2.45803e-04], device='cuda:0') model.model[0].bn.weight after: Parameter containing: tensor([2.11914, 1.73828, 2.18555, 2.50000, 1.11426, 1.24609, 2.95312, 0.81934, 2.17578, 2.25586, 2.52344, 3.11523, 1.61328, 2.10156, 1.76855, 2.09570, 2.92188, 1.17383, 2.30664, 2.28711, 2.54102, 2.95703, 3.06055, 3.59961, 1.96582, 2.20703, 3.08008, 1.06055, 1.71777, 2.69141, 2.49805, 2.15430, 2.83594, 2.56445, 1.84277, 2.61719, 1.31250, 2.11328, 2.40625, 0.84619, 3.39844, 2.44336, 1.44629, 1.21973, 2.59570, 1.91211, 2.33594, 2.42188, 2.52344, 3.29883, 1.27246, 2.21484, 1.61328, 1.17578, 1.77734, 1.77344, 2.51367, 3.05273, 1.34277, 2.59961, 3.46875, 1.40723, 3.00781, 2.42383], device='cuda:0', requires_grad=True)
是我自己的问题,已经解决了
Forward
Parameter containing: tensor([2.11914, 1.73828, 2.18555, 2.50000, 1.11426, 1.24609, 2.95312, 0.81934, 2.17578, 2.25586, 2.52344, 3.11523, 1.61328, 2.10156, 1.76855, 2.09570, 2.92188, 1.17383, 2.30664, 2.28711, 2.54102, 2.95703, 3.06055, 3.59961, 1.96582, 2.20703, 3.08008, 1.06055, 1.71777, 2.69141, 2.49805, 2.15430, 2.83594, 2.56445, 1.84277, 2.61719, 1.31250, 2.11328, 2.40625, 0.84619, 3.39844, 2.44336, 1.44629, 1.21973, 2.59570, 1.91211, 2.33594, 2.42188, 2.52344, 3.29883, 1.27246, 2.21484, 1.61328, 1.17578, 1.77734, 1.77344, 2.51367, 3.05273, 1.34277, 2.59961, 3.46875, 1.40723, 3.00781, 2.42383], device='cuda:0', requires_grad=True) model.model[0].bn.weight.grad after: tensor([ 1.75973e-04, -1.24160e-03, 7.71124e-04, 1.68284e-04, -3.08524e-05, 3.52873e-05, 2.71649e-04, -6.97412e-05, 1.16599e-03, -1.32226e-04, -3.44711e-04, -1.37540e-04, -1.27639e-04, 2.16433e-04, -1.90370e-04, -3.67745e-04, 1.30074e-04, 4.84745e-04, 5.60192e-04, 3.46283e-04, 1.48443e-04, 3.68081e-04, -1.89473e-05, 2.38092e-04, -2.50304e-04, 8.99445e-04, -2.93946e-04, 4.59142e-04, 9.26678e-04, 2.34621e-04, 5.76800e-06, -9.57086e-05, -1.17953e-04, -3.16530e-04, 2.24184e-04, 3.30295e-04, 3.42924e-04, -6.62573e-05, 3.08914e-04, -2.62792e-04, 6.73363e-05, -8.41171e-05, 2.21043e-04, -6.37600e-06, 2.59514e-05, -8.04854e-04, 4.55177e-04, 8.46961e-04, -3.30701e-04, 6.08296e-04, 1.40503e-04, -1.21659e-04, 4.38148e-04, -2.28777e-05, 6.35203e-04, -2.14394e-04, -2.74256e-04, -5.64065e-04, 1.24003e-03, -3.57309e-05, 8.90109e-05, -4.77516e-04, 1.92485e-04, 2.45803e-04], device='cuda:0') model.model[0].bn.weight after: Parameter containing: tensor([2.11914, 1.73828, 2.18555, 2.50000, 1.11426, 1.24609, 2.95312, 0.81934, 2.17578, 2.25586, 2.52344, 3.11523, 1.61328, 2.10156, 1.76855, 2.09570, 2.92188, 1.17383, 2.30664, 2.28711, 2.54102, 2.95703, 3.06055, 3.59961, 1.96582, 2.20703, 3.08008, 1.06055, 1.71777, 2.69141, 2.49805, 2.15430, 2.83594, 2.56445, 1.84277, 2.61719, 1.31250, 2.11328, 2.40625, 0.84619, 3.39844, 2.44336, 1.44629, 1.21973, 2.59570, 1.91211, 2.33594, 2.42188, 2.52344, 3.29883, 1.27246, 2.21484, 1.61328, 1.17578, 1.77734, 1.77344, 2.51367, 3.05273, 1.34277, 2.59961, 3.46875, 1.40723, 3.00781, 2.42383], device='cuda:0', requires_grad=True)