erprogs / GenConViT

Deepfake Video Detection Using Generative Convolutional Vision Transformer
GNU General Public License v3.0
51 stars 11 forks source link

How to test the trained model's auc. #9

Open wzk101 opened 8 months ago

erprogs commented 8 months ago

Hello @wzk101,

I've uploaded the results of our model detection to the result folder. Below is the code I've used to compute the AUC. If you wish to compute the AUC for your own detection, you can use the code I've provided below. Enjoy!

import os
import json
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score, f1_score

json_files = [
    os.path.join("result", "data_april14_Celeb-DF.json"),
    os.path.join("result", "data_april14_DFDC.json"),
    os.path.join("result", "data_april11_DeepfakeTIMIT.json"),
    os.path.join("result", "data_april14_FF++.json"),
]

# Lists to store the ROC curve data
fpr_list = []
tpr_list = []
roc_auc_list = []

for json_file in json_files:
    with open(json_file, "r") as f:
        result = json.load(f)

    # Get the actual labels and predicted probabilities or predicted labels from the result dictionary
    actual_labels = result["video"]["correct_label"]
    predicted_probs = result["video"]["pred"]
    predicted_labels = result["video"]["pred_label"]

    big_pp = [1 if P >= 0.5 else 0 for P in predicted_probs]
    p_labels = [1 if label == "FAKE" else 0 for label in predicted_labels]
    a_labels = [1 if label == "FAKE" else 0 for label in actual_labels]

    # Calculate ROC curve and AUC
    fpr, tpr, thresholds = roc_curve(a_labels, predicted_probs)
    roc_auc = roc_auc_score(a_labels, predicted_probs)
    f1 = f1_score(a_labels, big_pp)

    # Append the data to the lists
    fpr_list.append(fpr)
    tpr_list.append(tpr)
    roc_auc_list.append(roc_auc)

    a = 0
    for i in range(len(p_labels)):
        if p_labels[i] == a_labels[i]:
            a += 1

    accuracy = sum(x == y for x, y in zip(p_labels, a_labels)) / len(p_labels)
    real_acc = sum(
        (x == y and y == 0) for x, y in zip(p_labels, a_labels)
    ) / a_labels.count(0)
    fake_acc = sum(
        (x == y and y == 1) for x, y in zip(p_labels, a_labels)
    ) / a_labels.count(1)
    print(
        f"{(json_file[:-5].split('_')[-1])}:\nReal accuracy {real_acc*100:.3f} Fake accuracy {fake_acc*100:.3f}, Accuracy: {accuracy*100:.3f}"
    )
    print(f"ROC AUC: {roc_auc:.3f}")
    print(f"F1 Score: {f1:.3f}\n")

# Plot ROC curves
plt.figure()
for i in range(len(json_files)):
    plt.plot(
        fpr_list[i],
        tpr_list[i],
        label=f"{json_files[i][:-5].split('_')[-1]} (area = %0.3f)" % roc_auc_list[i],
    )

plt.plot([0, 1], [0, 1], "k--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend(loc="lower right")
plt.show()
wzk101 commented 8 months ago

Hello @wzk101,

I've uploaded the results of our model detection to the result folder. Below is the code I've used to compute the AUC. If you wish to compute the AUC for your own detection, you can use the code I've provided below. Enjoy!

import os
import json
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score, f1_score

json_files = [
    os.path.join("result", "data_april14_Celeb-DF.json"),
    os.path.join("result", "data_april14_DFDC.json"),
    os.path.join("result", "data_april11_DeepfakeTIMIT.json"),
    os.path.join("result", "data_april14_FF++.json"),
]

# Lists to store the ROC curve data
fpr_list = []
tpr_list = []
roc_auc_list = []

for json_file in json_files:
    with open(json_file, "r") as f:
        result = json.load(f)

    # Get the actual labels and predicted probabilities or predicted labels from the result dictionary
    actual_labels = result["video"]["correct_label"]
    predicted_probs = result["video"]["pred"]
    predicted_labels = result["video"]["pred_label"]

    big_pp = [1 if P >= 0.5 else 0 for P in predicted_probs]
    p_labels = [1 if label == "FAKE" else 0 for label in predicted_labels]
    a_labels = [1 if label == "FAKE" else 0 for label in actual_labels]

    # Calculate ROC curve and AUC
    fpr, tpr, thresholds = roc_curve(a_labels, predicted_probs)
    roc_auc = roc_auc_score(a_labels, predicted_probs)
    f1 = f1_score(a_labels, big_pp)

    # Append the data to the lists
    fpr_list.append(fpr)
    tpr_list.append(tpr)
    roc_auc_list.append(roc_auc)

    a = 0
    for i in range(len(p_labels)):
        if p_labels[i] == a_labels[i]:
            a += 1

    accuracy = sum(x == y for x, y in zip(p_labels, a_labels)) / len(p_labels)
    real_acc = sum(
        (x == y and y == 0) for x, y in zip(p_labels, a_labels)
    ) / a_labels.count(0)
    fake_acc = sum(
        (x == y and y == 1) for x, y in zip(p_labels, a_labels)
    ) / a_labels.count(1)
    print(
        f"{(json_file[:-5].split('_')[-1])}:\nReal accuracy {real_acc*100:.3f} Fake accuracy {fake_acc*100:.3f}, Accuracy: {accuracy*100:.3f}"
    )
    print(f"ROC AUC: {roc_auc:.3f}")
    print(f"F1 Score: {f1:.3f}\n")

# Plot ROC curves
plt.figure()
for i in range(len(json_files)):
    plt.plot(
        fpr_list[i],
        tpr_list[i],
        label=f"{json_files[i][:-5].split('_')[-1]} (area = %0.3f)" % roc_auc_list[i],
    )

plt.plot([0, 1], [0, 1], "k--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend(loc="lower right")
plt.show()

thanks a lot, I tested the model in the LQ FF++, and the auc was good while the acc performed bad. Is that normal? In celeb and HQ FF++ the model is awesome.

erprogs commented 8 months ago

Which FF++ sub class did you test? can you post the result you get?

wzk101 commented 8 months ago

Which FF++ sub class did you test? can you post the result you get?

youtobe original, deepfakes, face2face, neuraltexture and face swap, I detected in video level and set 15 frames, and got ACC:0.4856 , AUC: 0.884.

erprogs commented 8 months ago

That's a very low accuracy rate. Maybe the calculation of the accuracy is incorrect? I'll test it on my system later today or tomorrow.