Closed VEOjiwon closed 1 month ago
Hi,
I have tested sed_scores_eval on the teacher scores you provided as follows
scores_dir = Path("/path/to/your/provided/scores_dir")
psds, psd_roc, single_class_psd_rocs = intersection_based.psds(
scores=scores_dir/"psds_teacher"/"scores",
ground_truth=scores_dir/"public.tsv",
audio_durations=scores_dir/"public_duration.tsv",
dtc_threshold=.7,
gtc_threshold=.7,
cttc_threshold=None,
alpha_ct=0,
alpha_st=1.,
unit_of_time='hour',
max_efpr=100.,
num_jobs=3,
time_decimals=6,
)
print(psds)
which gives a psds of 0.5054 as in you screenshot.
Running the reference implementation based on psds_eval (that is used for testing) as follows
(
psds_ref, psd_roc_ref, single_class_psd_rocs_ref
) = intersection_based.reference.approximate_psds(
scores=scores_dir/"psds_teacher"/"scores",
ground_truth=scores_dir/"public.tsv",
audio_durations=scores_dir/"public_duration.tsv",
thresholds=np.linspace(0.01,.99,50),
dtc_threshold=.7,
gtc_threshold=.7,
cttc_threshold=None,
alpha_ct=0,
alpha_st=1.,
unit_of_time='hour',
max_efpr=100.,
)
print(psds_ref)
gives a psds_ref of 0.4956 which doesn't match your reported value.
I am wondering if the reason for the difference might be in the way you are performing the decoding before computing psds from operating points. In your code (that I cannot execute btw) I see that you use decode_pred_batch
to obtain the ops for compute_psds_from_operating_points
whereas scores for the compute_psds_from_scores
call come from batched_decode_preds
.
Can you maybe provide a function (incl dependencies so I can execute it) where your reported results are both generated from the same scores that you provided?
I use same code at dcase baseline and FDY-SED github (https://github.com/frednam93/FDY-SED)
this used codes
def batched_decode_preds(strong_preds, weak_preds, filenames, encoder, thresholds, median_filter, decode_weak, pad_indx=None):
# Init a dataframe per threshold
scores_raw = {}
scores_postprocessed = {}
prediction_dfs = {}
for threshold in thresholds:
prediction_dfs[threshold] = pd.DataFrame()
for j in range(strong_preds.shape[0]): # over batches
audio_id = Path(filenames[j]).stem
filename = audio_id + ".wav"
c_scores = strong_preds[j]
# pdb.set_trace()
if pad_indx is not None:
true_len = int(c_scores.shape[-1] * pad_indx[j].item())
c_scores = c_scores[:true_len]
c_scores = c_scores.transpose(0, 1).detach().cpu().numpy()
c_scores_org = copy.deepcopy(c_scores)
scores_raw[audio_id] = create_score_dataframe(
scores=c_scores,
timestamps=encoder._frame_to_time(np.arange(len(c_scores)+1)),
event_classes=encoder.labels,
)
for mf_idx in range(len(median_filter)):
c_scores[:, mf_idx] = scipy.ndimage.filters.median_filter(c_scores[:, mf_idx], (median_filter[mf_idx]))
scores_postprocessed[audio_id] = create_score_dataframe(
scores=c_scores,
timestamps=encoder._frame_to_time(np.arange(len(c_scores)+1)),
event_classes=encoder.labels,
)
for c_th in thresholds:
if decode_weak: # if decode_weak = 1 or 2
for class_idx in range(weak_preds.size(1)):
if weak_preds[j, class_idx] < c_th:
c_scores_org[:, class_idx] = 0
elif decode_weak > 1: # use only weak predictions (weakSED)
c_scores_org[:, class_idx] = 1
if decode_weak < 2: # weak prediction masking
c_scores_org = c_scores_org > c_th
for mf_idx in range(len(median_filter)):
c_scores_org[:, mf_idx] = scipy.ndimage.filters.median_filter(c_scores_org[:, mf_idx], (median_filter[mf_idx]))
pred = encoder.decode_strong(c_scores_org)
pred = pd.DataFrame(pred, columns=["event_label", "onset", "offset"])
pred["filename"] = filename
prediction_dfs[c_th] = pd.concat([prediction_dfs[c_th], pred], ignore_index=True)
return scores_raw, scores_postprocessed, prediction_dfs
def decode_pred_batch(outputs, weak_preds, filenames, encoder, thresholds, median_filter, decode_weak, pad_idx=None):
pred_dfs = {}
for threshold in thresholds:
pred_dfs[threshold] = pd.DataFrame()
for batch_idx in range(outputs.shape[0]): #outputs size = [bs, n_class, frames]
for c_th in thresholds:
output = outputs[batch_idx] #outputs size = [n_class, frames]
if pad_idx is not None:
true_len = int(output.shape[-1] * pad_idx[batch_idx].item)
output = output[:true_len]
output = output.transpose(0, 1).detach().cpu().numpy() #output size = [frames, n_class]
if decode_weak: # if decode_weak = 1 or 2
for class_idx in range(weak_preds.size(1)):
if weak_preds[batch_idx, class_idx] < c_th:
output[:, class_idx] = 0
elif decode_weak > 1: # use only weak predictions (weakSED)
output[:, class_idx] = 1
if decode_weak < 2: # weak prediction masking
output = output > c_th
for mf_idx in range(len(median_filter)):
output[:, mf_idx] = scipy.ndimage.filters.median_filter(output[:, mf_idx], (median_filter[mf_idx]))
pred = encoder.decode_strong(output)
pred = pd.DataFrame(pred, columns=["event_label", "onset", "offset"])
pred["filename"] = Path(filenames[batch_idx]).stem + ".wav"
pred_dfs[c_th] = pred_dfs[c_th].append(pred, ignore_index=True)
return pred_dfs
Hi,
so if you use different methods for post-processing you cannot expect the results to be comparable. If you think, however, the two functions are doing the same post-processing, then please provide a minimal working example where scores with the same post-processing give a psds1_2023 that is lower than psds1_2022. Otherwise I cannot see if there really is an issue here.
Hello.
Problem : Although this library was used, there is a problem that performance is lower in certain models compared to the existing psds score. Please refer to the picture below for performance. For performance implementation, related codes and materials are shared through the link below.
code for evaluation
scores : https://drive.google.com/file/d/1i_OPaFSQKnZZH6kQZr5cqxTdNsapn0mh/view?usp=share_link