SegmentationBLWX / sssegmentation

SSSegmentation: An Open Source Supervised Semantic Segmentation Toolbox Based on PyTorch.
https://sssegmentation.readthedocs.io/en/latest/
Apache License 2.0
799 stars 107 forks source link

[BUG] Simple Trial of FCN on VOC #58

Closed zhaoyurui closed 1 month ago

zhaoyurui commented 1 month ago

Windows 10

Question description (问题描述) 用FCN在VOC上进行训练简单脚本如下 【训练部分不完整】: import torch import numpy as np import matplotlib.pyplot as plt from PIL import Image import copy

import torch.distributed as dist import datetime import os import xml.dom.minidom as xmldom

from ssseg.configs.base import DATASET_CFG_VOCAUG_512x512, DATASET_CFG_COCOStuff10k_640x640 from ssseg.configs.base import DATALOADER_CFG_BS16,DATALOADER_CFG_BS8 from ssseg.configs.fcn.base_cfg import SEGMENTOR_CFG from ssseg.modules.models.segmentors.fcn import FCN

from ssseg.modules.datasets import BuildDataset

from ssseg.modules.datasets import DatasetBuilder # 数据集构建器

from ssseg.modules.datasets import BuildDataTransform

from ssseg.modules.datasets import DataTransformBuilder # 数据增强构建器

from ssseg.modules.parallel import BuildDistributedDataloader

initialize environment

torch.autocast(device_type="cuda", dtype=torch.bfloat16).enter() if torch.cuda.get_device_properties(0).major >= 8: torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True

import logging

os.environ['MASTER_ADDR'] = 'localhost'

os.environ['MASTER_PORT'] = '5678'

os.environ["PL_TORCH_DISTRIBUTED_BACKEND"] = "gloo" os.environ["CUDA_VISIBLE_DEVICES"] = "1" torch.cuda.device_count() torch.cuda.empty_cache()

dist.init_process_group(backend='gloo', init_method='env://', rank = 0, world_size = 1)

dist.init_process_group(backend='nccl',init_method=None,

timeout=datetime.timedelta(seconds=1800),

world_size=-1,rank=-1,store=None,group_name='default')

CUDA_LAUNCH_BLOCKING = 1

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

deepcopy

SEGMENTOR_CFG = copy.deepcopy(SEGMENTOR_CFG)

modify dataset config

SEGMENTOR_CFG['dataset'] = DATASET_CFG_VOCAUG_512x512.copy()

SEGMENTOR_CFG['dataloader'] = DATALOADER_CFG_BS8.copy()

modify scheduler config

SEGMENTOR_CFG['scheduler']['max_epochs'] = 60

modify other segmentor configs

Anno_Path = r"E:/Study/1Mask2Former/sssegmentation-main/sssegmentation-main-code01/VOCdevkit/VOC2012/Annotations"

def getClassNum(Anno_Path): annotation_names=[os.path.join(Anno_Path,i) for i in os.listdir(Anno_Path)] labels = list() for names in annotation_names: xmlfilepath = names domobj = xmldom.parse(xmlfilepath)

得到元素对象

    elementobj = domobj.documentElement
    #获得子标签
    subElementObj = elementobj.getElementsByTagName("object")
    for s in subElementObj:
        label=s.getElementsByTagName("name")[0].firstChild.data
        #print(label)
        if label not in labels:
            labels.append(label)
return len(labels)

class DataMeta(object): def init(self, image, seg_target, width, height, id): self.images = image self.seg_target = seg_target self.width = width self.height = height self.id = id def gettargets(self): targets = {} targets['seg_targets'] = self.seg_target return targets

SEGMENTOR_CFG['num_classes'] = getClassNum(Anno_Path) # 20

SEGMENTOR_CFG['num_classes'] = 20

SEGMENTOR_CFG['backbone'] = { 'type': 'ResNet', 'depth': 50, 'structure_type': 'resnet50conv3x3stem', 'pretrained': True, 'outstride': 16, 'use_conv3x3_stem': True, 'selected_indices': (2, 3), } SEGMENTOR_CFG['work_dir'] = 'fcn_resnet50os16_voc' SEGMENTOR_CFG['logfilepath'] = 'fcn_resnet50os16_voc/fcn_resnet50os16_voc.log' SEGMENTOR_CFG['resultsavepath'] = 'fcn_resnet50os16_voc/fcn_resnet50os16_voc_results.pkl'

Set up the logger

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger_handle = logging.getLogger(name)

train_dataset = BuildDataset(mode='TRAIN', logger_handle = logger_handle, dataset_cfg=SEGMENTOR_CFG['dataset']) DataLoader = DATALOADER_CFG_BS8.copy()

model = FCN(SEGMENTOR_CFG,mode='TRAIN_DEVELOP')

Mode : 'TRAIN','TRAIN_DEVELOP'

model.train() model.to(device)

DataLoader['shuffle'] = False if 'expected_total_train_bs_for_assert' in DataLoader: del DataLoader['expected_total_train_bs_for_assert'] if 'auto_adapt_to_expected_train_bs' in DataLoader: del DataLoader['auto_adapt_to_expected_train_bs'] if 'train' in DataLoader: del DataLoader['train'] if 'test' in DataLoader: del DataLoader['test']

train_dataloader = BuildDistributedDataloader(train_dataset, DataLoader)

for batch, data in enumerate(train_dataloader):

data_meta = DataMeta(data['image'].to(device),  data['seg_target'].to(device), data['width'],  data['height'], data['id'])

ssseg_outputs = model(data_meta)

print("Iter=====Ending=======")

Screenshot (报错截图) image

CharlesPikachu commented 1 month ago

try to set CUDA_LAUNCH_BLOCKING=1 to see more details

CharlesPikachu commented 1 month ago

closed since solved