# utils.py
def cal_auc_pauc(y_true, y_pred, domain_list, max_fpr=0.1):
y_true_s = [y_true[idx] for idx in range(len(y_true)) if domain_list[idx] == "source" or y_true[idx] == 1]
y_pred_s = [y_pred[idx] for idx in range(len(y_true)) if domain_list[idx] == "source" or y_true[idx] == 1]
y_true_t = [y_true[idx] for idx in range(len(y_true)) if domain_list[idx] == "target" or y_true[idx] == 1]
y_pred_t = [y_pred[idx] for idx in range(len(y_true)) if domain_list[idx] == "target" or y_true[idx] == 1]
auc_s = sklearn.metrics.roc_auc_score(y_true_s, y_pred_s)
auc_t = sklearn.metrics.roc_auc_score(y_true_t, y_pred_t)
p_auc = sklearn.metrics.roc_auc_score(y_true, y_pred, max_fpr=max_fpr)
return auc_s, auc_t, p_auc
I consider that the y_true_s should only contain the source domain data, but in above code it also contain the anomaly target domain data , and the other variable have the same question too.
I think that the anomaly and normal data will be imbalance.
Is there any reason?
Thanks.
I consider that the
y_true_s
should only contain the source domain data, but in above code it also contain the anomaly target domain data , and the other variable have the same question too. I think that the anomaly and normal data will be imbalance. Is there any reason? Thanks.