RuntimeError: CUDA error: device-side assert triggered

The code: import argparse import os, sys import shutil import time from pathlib import Path import imageio

BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(file))) sys.path.append(BASE_DIR)

import torch import torch.nn as nn from torch.autograd import Variable from torchvision import datasets, transforms import numpy as np from lib.models import get_net,YOLOP_only_driveareaSeg,YOLOP_slim_only_driveareaSeg from lib.config import cfg from lib.utils.utils import create_logger, select_device, time_synchronized import shutil

from lib.models.common import (SPP, Bottleneck, BottleneckCSP, Concat, Conv, Detect, Focus, SharpenConv, modifeid_DepthSeperabelConv2d, modified_BottleneckCSP, modified_Conv, modified_SharpenConv, modified_SPP)

def prune(cfg,opt):

## creat logger
logger, _, _ = create_logger(
    cfg, cfg.LOG_DIR, 'prune')
device = select_device(logger,opt.device)
if os.path.exists(opt.save_dir):  # output dir
    shutil.rmtree(opt.save_dir)  # delete dir
os.makedirs(opt.save_dir)  # make new dir
## generate model 
model = get_net(cfg,m_block_cfg=YOLOP_only_driveareaSeg)
## load weights
print(opt.weights)
checkpoint = torch.load(opt.weights,map_location= device)
model.load_state_dict(checkpoint['state_dict'])
model = model.to(device)

# print(model)

'''
1. 获取所有特征图的gamma值,存入一个list当中
2. 对gamma值进行排序
'''

total = 0 # 每层特征图个数 总和(计算一下整个网络中有多少网络)

for m in model.modules():

    if isinstance(m, modified_Conv):
        # print(m)
        total += m.bn.weight.data.shape[0]

bn = torch.zeros(total) # 拿到每一个gamma值 每个特征图都会对应一个γ、β
index = 0
for m in model.modules():
    if isinstance(m, modified_Conv):
        # 把 gammma 值都存入 bn中,按照索引存, 比如 index1:index1+size 存bn1的gamma
        size = m.bn.weight.data.shape[0]
        bn[index:(index+size)] = m.bn.weight.data.abs().clone()
        index += size

## 对gamma值进行排序
y, i = torch.sort(bn)
## 计算阈值
thre_index = int(total * opt.percent)
## 截取到哪一个索引
thre = y[thre_index]

pruned = 0
cfg_list = []
cfg_mask = []

for k, m in enumerate(model.modules()):
    if isinstance(m, modified_Conv):# 如果是一个BatchNorm2d层
        weight_copy = m.bn.weight.data.clone()#把权重拷贝

        #.gt 比较前者是否大于后者: 即当前的权重参数是否大于阈值
        # mask里的元素是0或1的, 当前排序后gamma的索引大于阈值,返回1,小于0
        mask = weight_copy.abs().gt(thre).float().cuda() 
        # pruned剪枝要剪去多少个, 初始值是0个
        pruned = pruned + mask.shape[0] - torch.sum(mask)
        # 通过和mask做乘法把实际权重值置0
        m.bn.weight.data.mul_(mask) # BN层gamma置0
        m.bn.bias.data.mul_(mask) 
        # cfg是一个list,比如它的第一个元素是34,即第一个卷积层保留了34个特征图
        cfg_list.append(int(torch.sum(mask)))
        cfg_mask.append(mask.clone())
        print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
            format(k, mask.shape[0], int(torch.sum(mask))))
    elif isinstance(m, nn.MaxPool2d):
        cfg_list.append('M')

pruned_ratio = pruned/total

print('Pre-processing Successful!')
print("cfg_list:",cfg_list)
print(len(cfg_list))
print(pruned_ratio )
# Make real prune

newmodel = get_net(cfg,m_block_cfg=YOLOP_slim_only_driveareaSeg)   ##  # 剪枝后的模型
newmodel = newmodel.to(device)
num_parameters = sum([param.nelement() for param in newmodel.parameters()])
savepath = os.path.join(opt.save_dir, "prune.txt")
with open(savepath, "w") as fp:
    fp.write("Configuration: \n"+str(cfg_list)+"\n")
    fp.write("Number of parameters: \n"+str(num_parameters)+"\n")

layer_id_in_cfg = 0       ##  为剪枝后的模型赋值权重
start_mask = torch.ones(3)  #当前block的输入(在BN层更新,当前的输出是下一个block的输入)
end_mask = cfg_mask[layer_id_in_cfg] #当前block的输出(从训练得到的配置中得到)
for [m0, m1] in zip(model.modules(), newmodel.modules()):
    if isinstance(m0,modified_Conv):
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy()))) # 赋值
        m1.bn.weight.data = m0.bn.weight.data[idx1].clone()
        m1.bn.bias.data = m0.bn.bias.data[idx1].clone()
        m1.bn.running_mean = m0.bn.running_mean[idx1].clone()
        m1.bn.running_var = m0.bn.running_var[idx1].clone()
        layer_id_in_cfg += 1
        start_mask = end_mask.clone() #下一层的
        if layer_id_in_cfg < len(cfg_mask):  # do not change in Final FC
            end_mask = cfg_mask[layer_id_in_cfg] #输出

        idx0_1 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        idx1_1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))
        print('In shape: {:d} Out shape:{:d}'.format(idx0_1.shape[0], idx1_1.shape[0]))
        if idx0_1.size == 1:
            idx0_1 = np.resize(idx0_1, (1,))
        if idx1_1.size == 1:
            idx1_1 = np.resize(idx1_1, (1,))
        ## # 注意卷积核Tensor维度为[n, c, w, h]，两个卷积层连接，下一层的输入维度n就等于当前层的c
        w1 = m0.conv.weight.data[:, idx0_1.tolist(), :, :].clone() #拿到原始训练好权重
        w1 = w1[idx1_1.tolist(), :, :, :].clone() # 根据筛选出的特征图的id,拷贝需要的权重
        m1.conv.weight.data = w1.clone() # 将所需权重赋值到剪枝后的模型
        # m1.bias.data = m0.bias.data[idx1].clone()

    elif isinstance(m0, nn.Linear):
        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        # m0.weight.data 是 (10,512), 即每个特征图是长度为10的一维向量,现在按照idx0的索引取出需要的模型
        m1.weight.data = m0.weight.data[:, idx0].clone()

# 保存剪枝后的模型

torch.save({'cfg_list': cfg_list, 'state_dict': newmodel.state_dict()}, opt.save_dir)

The error:

Pre-processing Successful! cfg_list: [14, 35, 24, 28, 16, 11, 72, 37, 92, 31, 23, 30, 30, 32, 42, 138, 33, 124, 65, 55, 60, 84, 60, 120, 272, 124, 247, 'M', 125, 226, 120, 110, 149, 64, 123, 61, 73, 19, 70, 17, 32, 12, 15, 13, 11, 3, 6, 2, 2, 2] 50 tensor(0.5002, device='cuda:0') In shape: 14 Out shape:35 Traceback (most recent call last): File "/media/new_data4/.conda/envs/torch171/lib/python3.7/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/media/new_data4/.conda/envs/torch171/lib/python3.7/runpy.py", line 85, in _run_code exec(code, run_globals) File "/media/new_data4/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/main.py", line 39, in cli.main() File "/media/new_data4/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 430, in main run() File "/media/new_data4/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 284, in run_file runpy.run_path(target, run_name="main") File "/media/new_data4/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 322, in run_path pkg_name=pkg_name, script_name=fname) File "/media/new_data4/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 136, in _run_module_code mod_name, mod_spec, pkg_name, script_name) File "/media/new_data4/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 124, in _run_code exec(code, run_globals) File "/media/new_data4/csn_work/YOLOPR1/tools/prune.py", line 180, in prune(cfg,opt) File "/media/new_data4/csn_work/YOLOPR1/tools/prune.py", line 142, in prune w1 = w1[idx1_1.tolist(), :, :, :].clone() # 根据筛选出的特征图的id,拷贝需要的权重 RuntimeError: CUDA error: device-side assert triggered

Eric-mingjie / network-slimming

RuntimeError: CUDA error: device-side assert triggered #88