RetroCirce / HTS-Audio-Transformer

The official code repo of "HTS-AT: A Hierarchical Token-Semantic Audio Transformer for Sound Classification and Detection"
https://arxiv.org/abs/2202.00874
MIT License
341 stars 62 forks source link

RuntimeError: Default process group has not been initialized, please make sure to call init_process_group. #61

Open zhiziwy opened 1 month ago

zhiziwy commented 1 month ago

RuntimeError: Default process group has not been initialized, please make sure to call init_process_group.

wthu12138 commented 1 month ago

Hi, I have the same problem. Did you solve it?

ACitronella commented 1 month ago

Hi, I found this problem as well while testing on esc-50. my device have only 1 gpu, so the command that i used is

CUDA_VISIBLE_DEVICES=0 python main.py test

then, I found the same RuntimeError

Looking back to the commit 8e1f216, where the author was adding single gpu support, there are several lines in the file sed_model.py added to make sure validation_epoch_end checks about the number of devices. But, the author doesn't apply the same change to test_epoch_end. So, I fixed it by copying the change in validation_epoch_end to test_epoch_end.

You can replace test_epoch_end in sed_model.py with the code below

def test_epoch_end(self, test_step_outputs):
    self.device_type = next(self.parameters()).device
    if self.config.fl_local:
        pred = np.concatenate([d[0] for d in test_step_outputs], axis = 0)
        pred_map = np.concatenate([d[1] for d in test_step_outputs], axis = 0)
        audio_name = np.concatenate([d[2] for d in test_step_outputs], axis = 0)
        real_len = np.concatenate([d[3] for d in test_step_outputs], axis = 0)
        heatmap_file = os.path.join(self.config.heatmap_dir, self.config.test_file + "_" + str(self.device_type) + ".npy")
        save_npy = [
            {
                "audio_name": audio_name[i],
                "heatmap": pred_map[i],
                "pred": pred[i],
                "real_len":real_len[i]
            }
            for i in range(len(pred))
        ]
        np.save(heatmap_file, save_npy)
    else:
        self.device_type = next(self.parameters()).device
        pred = torch.cat([d[0] for d in test_step_outputs], dim = 0)
        target = torch.cat([d[1] for d in test_step_outputs], dim = 0)

        if torch.cuda.device_count() > 1:
            gather_pred = [torch.zeros_like(pred) for _ in range(dist.get_world_size())]
            gather_target = [torch.zeros_like(target) for _ in range(dist.get_world_size())]
            dist.barrier()

        if self.config.dataset_type == "audioset":
            metric_dict = {
            "mAP": 0.,
            "mAUC": 0.,
            "dprime": 0.
            }
        else:
            metric_dict = {
                "acc":0.
            }
        if torch.cuda.device_count() > 1:
            dist.all_gather(gather_pred, pred)
            dist.all_gather(gather_target, target)
            if dist.get_rank() == 0:
                gather_pred = torch.cat(gather_pred, dim = 0).cpu().numpy()
                gather_target = torch.cat(gather_target, dim = 0).cpu().numpy()
                if self.config.dataset_type == "scv2":
                    gather_target = np.argmax(gather_target, 1)
                metric_dict = self.evaluate_metric(gather_pred, gather_target)
                print(self.device_type, dist.get_world_size(), metric_dict, flush = True)
            if self.config.dataset_type == "audioset":
                self.log("mAP", metric_dict["mAP"] * float(dist.get_world_size()), on_epoch = True, prog_bar=True, sync_dist=True)
                self.log("mAUC", metric_dict["mAUC"] * float(dist.get_world_size()), on_epoch = True, prog_bar=True, sync_dist=True)
                self.log("dprime", metric_dict["dprime"] * float(dist.get_world_size()), on_epoch = True, prog_bar=True, sync_dist=True)
            else:
                self.log("acc", metric_dict["acc"] * float(dist.get_world_size()), on_epoch = True, prog_bar=True, sync_dist=True)
            dist.barrier()
        else:
            gather_pred = pred.cpu().numpy()
            gather_target = target.cpu().numpy()
            if self.config.dataset_type == "scv2":
                gather_target = np.argmax(gather_target, 1)
            metric_dict = self.evaluate_metric(gather_pred, gather_target)
            print(self.device_type, metric_dict, flush = True)

            if self.config.dataset_type == "audioset":
                self.log("mAP", metric_dict["mAP"], on_epoch = True, prog_bar=True, sync_dist=False)
                self.log("mAUC", metric_dict["mAUC"], on_epoch = True, prog_bar=True, sync_dist=False)
                self.log("dprime", metric_dict["dprime"], on_epoch = True, prog_bar=True, sync_dist=False)
            else:
                self.log("acc", metric_dict["acc"], on_epoch = True, prog_bar=True, sync_dist=False)