cxliu0 / PET

[ICCV 2023] Point-Query Quadtree for Crowd Counting, Localization, and More
MIT License
61 stars 5 forks source link

Error when running on custom dataset with .txt annotations #26

Closed tejasri19 closed 5 months ago

tejasri19 commented 5 months ago

@cxliu0 , Im trying to reproduce PET on my custom data with ".txt" annotation format and facing errors as shown below. Can you please share how to resolve this issue.

Traceback (most recent call last): File "main.py", line 257, in main(args) File "main.py", line 190, in main train_stats = train_one_epoch( File "/home/tejasri/P2P/PET_paddy/engine.py", line 97, in train_one_epoch outputs = model(samples, epoch=epoch, train=True, File "/home/tejasri/anaconda3/envs/new/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl result = self.forward(*input, kwargs) File "/home/tejasri/P2P/PET_paddy/models/pet.py", line 302, in forward out = self.train_forward(samples, features, pos, kwargs) File "/home/tejasri/P2P/PET_paddy/models/pet.py", line 353, in train_forward losses = self.compute_loss(outputs, criterion, targets, epoch, samples) File "/home/tejasri/P2P/PET_paddy/models/pet.py", line 255, in compute_loss den = torch.tensor([target['density'] for target in targets]) # crowd density File "/home/tejasri/P2P/PET_paddy/models/pet.py", line 255, in den = torch.tensor([target['density'] for target in targets]) # crowd density KeyError: 'density'

cxliu0 commented 5 months ago

Please refer to SHA.py Line 92 for density computation, which is necessary during training. I guess you have deleted this line of code.

tejasri19 commented 5 months ago

@cxliu0, Thanks for your quick response. I modified SHA.py accordingly but received the following error

Outputs: {'loss_dict': {'loss_ce_sp': tensor(1.1668, device='cuda:0', grad_fn=), 'loss_points_sp': tensor(0., device='cuda:0', grad_fn=), 'loss_ce_ds': tensor(nan, device='cuda:0', grad_fn=), 'loss_points_ds': tensor(0., device='cuda:0', grad_fn=), 'loss_split': tensor(0.5733, device='cuda:0', grad_fn=)}, 'weight_dict': {'loss_ce_sp': 1.0, 'loss_points_sp': 5.0, 'loss_ce_ds': 1.0, 'loss_points_ds': 5.0, 'loss_split': 0.0}, 'losses': tensor(nan, device='cuda:0', grad_fn=)} Loss is nan, stopping training {'loss_ce_sp': tensor(1.1668, device='cuda:0', grad_fn=), 'loss_points_sp': tensor(0., device='cuda:0', grad_fn=), 'loss_ce_ds': tensor(nan, device='cuda:0', grad_fn=), 'loss_points_ds': tensor(0., device='cuda:0', grad_fn=), 'loss_split': tensor(0.5733, device='cuda:0', grad_fn=)}

Is it due to NAN values in the data input?

Please check modified SHA.py below:

`## sha.py import os import random import scipy

from scipy import spatial import networkx as nx

import torch import numpy as np from torch.utils.data import Dataset from PIL import Image import cv2 import glob

import scipy.io as io from matplotlib import pyplot as plt plt.switch_backend('agg')

class SHHA(Dataset): def init(self, data_root, transform=None, train=False, patch=False, flip=False): self.root_path = data_root self.train_lists = os.path.join(self.root_path, "train.txt") self.eval_list = os.path.join(self.root_path, "test.txt")

there may exist multiple list files

    #self.img_list_file = [name.split(',') for name in open(self.train_lists).read().splitlines()]

    if train:
        self.img_list_file = [name.split(',') for name in open(self.train_lists).read().splitlines()]
    else:
        self.img_list_file = [name.split(',') for name in open(self.eval_list).read().splitlines()]

    self.img_list = self.img_list_file

    # 
    self.nSamples = len(self.img_list)

    self.transform = transform
    self.train = train
    self.patch = patch
    self.flip = flip

def compute_density(self, points):
    """
    Compute crowd density:
        - defined as the average nearest distance between ground-truth points
    """
    #print("point", points)
    #print("point type", type(points))
    points_tensor = points.clone()
    print("points tensor", points_tensor)
    #points_tensor = torch.from_numpy(points.copy())
    #print("points tensor", points_tensor)
    dist = torch.cdist(points_tensor, points_tensor, p=2)
    if points_tensor.shape[0] > 1:
        density = dist.sort(dim=1)[0][:,1].mean().reshape(-1)
    else:
        density = torch.tensor(999.0).reshape(-1)
    return density

def __len__(self):
    return self.nSamples

def __getitem__(self, index):
    assert index <= len(self), 'index range error'

    img_path = self.img_list[index][0]
    gt_path = self.img_list[index][1]
    # 
    img, point = load_data((img_path, gt_path), self.train)

    #
    if self.transform is not None:
        img = self.transform(img)

    if self.train:
        # data augmentation -> random scale
        scale_range = [0.5, 1.4]
        min_size = min(img.shape[1:])
        scale = random.uniform(*scale_range)
        # scale the image and points
        if scale * min_size > 224:
            img = torch.nn.functional.upsample_bilinear(img.unsqueeze(0), scale_factor=scale).squeeze(0)
            point *= scale
    # random crop augumentaiton
    if self.train and self.patch:
        img, point = random_crop(img, point)
        for i, _ in enumerate(point):
            point[i] = torch.Tensor(point[i])
    # random flipping
    if random.random() > 0.1 and self.train and self.flip: # never flip
        # random flip
        img = torch.Tensor(img[:, :, :, ::-1].copy())
        for i, _ in enumerate(point):
            point[i][:, 0] = 224 - point[i][:, 0]
    # random change brightness
    if random.random() > 0.3 and self.train: # never flip
        #
        img = (torch.Tensor(img).clone())*random.uniform(8,12)/10
        for i, _ in enumerate(point):
            point[i][:, 0] = point[i][:, 0]

    if not self.train:
        point = [point]

    img = torch.Tensor(img)
    #print("img", img.shape)
    #print("point type", type(point))
    #  need to adapt your own image names
    target = [{} for i in range(len(point))]

    for i, _ in enumerate(point):
        #print("target", target)
        target[i]['point'] = torch.Tensor(point[i])

        # image_id_1 = int(img_path.split('/')[-1].split('.')[0].split("_")[1][4:8])
        image_id_1 = int(img_path.split('/')[-1].split('.')[0].split("_")[1])

        image_id_1 = torch.Tensor([image_id_1]).long()
        #
        image_id_2 = int(img_path.split('/')[-1].split('.')[0].split("_")[1])
        image_id_2 = torch.Tensor([image_id_2]).long()
        target[i]['image_id_1'] = image_id_1
        target[i]['image_id_2'] = image_id_2
        target[i]['labels'] = torch.ones([point[i].shape[0]]).long()

        if self.train:
            density = self.compute_density(point[i])
            target[i]['density'] = density

    return img, target

def load_data(img_gt_path, train): img_path, gt_path = img_gt_path

load the images

img = cv2.imread(img_path)

# print(img.shape)
# print(type(img))

img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# load ground truth points
points = []
#
pts = open(gt_path).read().splitlines()
for pt_0 in pts:
    # pt = eval(pt_0)
    pt = pt_0.split(" ")
    x = float(pt[0])
    y = float(pt[1])

    # x = int(pt[0].split(".")[0])
    # y = int(pt[1].split(".")[0])
    points.append([x, y])
return img, np.array(points)

random crop augumentation

def random_crop(img, den, num_patch=10): half_h = 224 half_w = 224 result_img = np.zeros([num_patch, img.shape[0], half_h, half_w]) result_den = []

for i in range(num_patch):
    start_h = random.randint(0, img.size(1) - half_h)
    start_w = random.randint(0, img.size(2) - half_w)
    end_h = start_h + half_h
    end_w = start_w + half_w
    # 
    result_img[i] = img[:, start_h:end_h, start_w:end_w]#*random.uniform(5,15)/10
    # copy the cropped points
    idx = (den[:, 0] >= start_w) & (den[:, 0] <= end_w) & (den[:, 1] >= start_h) & (den[:, 1] <= end_h)
    # 
    record_den = den[idx]
    record_den[:, 0] -= start_w
    record_den[:, 1] -= start_h

    result_den.append(record_den)

return result_img, result_den

`

cxliu0 commented 5 months ago

By default, this function ensures that the image size is divisible by 256. It appears that you use an image size of 224x224. This will introduce invalid image pixels, leading to NaN loss.

To solve this problem, you can change the block size and patch size to 224.

tejasri19 commented 5 months ago

Got it. Thank you.

I face the following error

Epoch: [5] Total time: 0:03:17 (0.2372 s / it) Averaged stats: lr: 0.000100 loss: 0.0549 (0.0726) loss_ce_sp: 0.0469 (0.0613) loss_points_sp: 0.0008 (0.0008) loss_ce_ds: 0.0070 (0.0098) loss_points_ds: 0.0001 (0.0001) loss_split: 0.0000 (0.0006) loss_ce_sp_unscaled: 0.0469 (0.0613) loss_points_sp_unscaled: 0.0002 (0.0002) loss_ce_ds_unscaled: 0.0070 (0.0098) loss_points_ds_unscaled: 0.0000 (0.0000) loss_split_unscaled: 0.0003 (0.0056) [ep 5][lr 0.0001000][197.16s] Traceback (most recent call last): File "main.py", line 257, in main(args) File "main.py", line 227, in main test_stats = evaluate(model, data_loader_val, device, epoch, None) File "/home/tejasri/anaconda3/envs/new/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context return func(*args, **kwargs) File "/home/tejasri/P2P/PET_paddy/engine.py", line 161, in evaluate gt_cnt = targets[0]['points'].shape[0] KeyError: 'points'

cxliu0 commented 5 months ago

Please follow SHA.py to customize your dataset. Do not directly delete variables defined in SHA.py.

tejasri19 commented 5 months ago

Resolved. Thank you