Closed chenller closed 2 months ago
evaluate_instance_metrics. py
{
"segm": {
"AP50": 55.84,
"AP50,0-100": 41.39,
"AP50,100-200": 77.54,
"AP50,200-400": 76.19,
"AP50,400-inf": 88.15,
"AR50": 70.98,
"AR50,0-100": 58.53,
"AR50,100-200": 83.74,
"AR50,200-400": 79.95,
"AR50,400-inf": 88.61,
"AP75": 21.67,
"AP75,0-100": 13.34,
"AP75,100-200": 32.23,
"AP75,200-400": 45.83,
"AP75,400-inf": 82.05,
"AR75": 37.67,
"AR75,0-100": 26.28,
"AR75,100-200": 40.91,
"AR75,200-400": 52.82,
"AR75,400-inf": 82.91,
"AP": 26.73,
"AP,0-100": 18.71,
"AP,100-200": 38.19,
"AP,200-400": 44.71,
"AP,400-inf": 66.13,
"AR": 38.99,
"AR,0-100": 29.57,
"AR,100-200": 44.72,
"AR,200-400": 49.67,
"AR,400-inf": 67.07,
"AP50_1-Layer": 60.09,
"AP50_2-Layer": 53.45,
"AP50_3-Layer": 53.99,
"AP_1-Layer": 29.54,
"AP_2-Layer": 24.84,
"AP_3-Layer": 25.82,
"AP50,0-100_1-Layer": 50.31,
"AP50,0-100_2-Layer": 38.72,
"AP50,0-100_3-Layer": 35.12,
"AP,0-100_1-Layer": 23.62,
"AP,0-100_2-Layer": 16.85,
"AP,0-100_3-Layer": 15.66,
"AP50,100-200_1-Layer": 77.73,
"AP50,100-200_2-Layer": 72.4,
"AP50,100-200_3-Layer": 82.5,
"AP,100-200_1-Layer": 40.01,
"AP,100-200_2-Layer": 35.72,
"AP,100-200_3-Layer": 38.83,
"AP50,200-400_1-Layer": 77.6,
"AP50,200-400_2-Layer": 58.79,
"AP50,200-400_3-Layer": 92.19,
"AP,200-400_1-Layer": 51.49,
"AP,200-400_2-Layer": 30.22,
"AP,200-400_3-Layer": 52.43,
"AP50,400-inf_1-Layer": 100.0,
"AP50,400-inf_2-Layer": 84.16,
"AP50,400-inf_3-Layer": 80.3,
"AP,400-inf_1-Layer": 80.0,
"AP,400-inf_2-Layer": 59.7,
"AP,400-inf_3-Layer": 58.7,
"AR50_1-Layer": 67.86,
"AR50_2-Layer": 64.39,
"AR50_3-Layer": 80.7,
"AR_1-Layer": 37.23,
"AR_2-Layer": 35.0,
"AR_3-Layer": 44.74,
"AR50,0-100_1-Layer": 58.46,
"AR50,0-100_2-Layer": 53.97,
"AR50,0-100_3-Layer": 63.16,
"AR,0-100_1-Layer": 30.54,
"AR,0-100_2-Layer": 26.98,
"AR,0-100_3-Layer": 31.18,
"AR50,100-200_1-Layer": 80.56,
"AR50,100-200_2-Layer": 75.0,
"AR50,100-200_3-Layer": 95.65,
"AR,100-200_1-Layer": 43.19,
"AR,100-200_2-Layer": 39.89,
"AR,100-200_3-Layer": 51.09,
"AR50,200-400_1-Layer": 80.95,
"AR50,200-400_2-Layer": 61.54,
"AR50,200-400_3-Layer": 97.37,
"AR,200-400_1-Layer": 56.19,
"AR,200-400_2-Layer": 33.08,
"AR,200-400_3-Layer": 59.74,
"AR50,400-inf_1-Layer": 100.0,
"AR50,400-inf_2-Layer": 84.0,
"AR50,400-inf_3-Layer": 81.82,
"AR,400-inf_1-Layer": 80.0,
"AR,400-inf_2-Layer": 61.2,
"AR,400-inf_3-Layer": 60.0
}
}
{
"segm": {
"AP50": 55.46,
"AP50,0-100": 36.3,
"AP50,100-200": 80.63,
"AP50,200-400": 90.35,
"AP50,400-inf": 95.49,
"AR50": 85.62,
"AR50,0-100": 75.75,
"AR50,100-200": 88.47,
"AR50,200-400": 94.06,
"AR50,400-inf": 98.17,
"AP75": 36.12,
"AP75,0-100": 16.52,
"AP75,100-200": 58.72,
"AP75,200-400": 80.24,
"AP75,400-inf": 92.35,
"AR75": 65.61,
"AR75,0-100": 45.35,
"AR75,100-200": 66.99,
"AR75,200-400": 84.95,
"AR75,400-inf": 95.64,
"AP": 33.27,
"AP,0-100": 18.73,
"AP,100-200": 49.95,
"AP,200-400": 65.62,
"AP,400-inf": 79.64,
"AR": 57.92,
"AR,0-100": 43.27,
"AR,100-200": 57.36,
"AR,200-400": 70.22,
"AR,400-inf": 84.15,
"AP50_1-Layer": 48.62,
"AP50_2-Layer": 55.42,
"AP50_3-Layer": 59.21,
"AP50_4-Layer": 58.6,
"AP_1-Layer": 30.28,
"AP_2-Layer": 32.22,
"AP_3-Layer": 35.5,
"AP_4-Layer": 35.07,
"AP50,0-100_1-Layer": 31.61,
"AP50,0-100_2-Layer": 35.65,
"AP50,0-100_3-Layer": 42.12,
"AP50,0-100_4-Layer": 35.81,
"AP,0-100_1-Layer": 16.8,
"AP,0-100_2-Layer": 18.05,
"AP,0-100_3-Layer": 22.02,
"AP,0-100_4-Layer": 18.04,
"AP50,100-200_1-Layer": 84.93,
"AP50,100-200_2-Layer": 81.65,
"AP50,100-200_3-Layer": 74.23,
"AP50,100-200_4-Layer": 81.72,
"AP,100-200_1-Layer": 58.36,
"AP,100-200_2-Layer": 49.95,
"AP,100-200_3-Layer": 43.46,
"AP,100-200_4-Layer": 48.04,
"AP50,200-400_1-Layer": 88.26,
"AP50,200-400_2-Layer": 92.95,
"AP50,200-400_3-Layer": 89.13,
"AP50,200-400_4-Layer": 91.08,
"AP,200-400_1-Layer": 67.78,
"AP,200-400_2-Layer": 66.63,
"AP,200-400_3-Layer": 64.61,
"AP,200-400_4-Layer": 63.47,
"AP50,400-inf_1-Layer": 95.46,
"AP50,400-inf_2-Layer": 99.06,
"AP50,400-inf_3-Layer": 96.34,
"AP50,400-inf_4-Layer": 91.11,
"AP,400-inf_1-Layer": 82.14,
"AP,400-inf_2-Layer": 82.41,
"AP,400-inf_3-Layer": 80.34,
"AP,400-inf_4-Layer": 73.66,
"AR50_1-Layer": 89.13,
"AR50_2-Layer": 80.53,
"AR50_3-Layer": 84.15,
"AR50_4-Layer": 88.66,
"AR_1-Layer": 61.92,
"AR_2-Layer": 53.22,
"AR_3-Layer": 56.81,
"AR_4-Layer": 59.74,
"AR50,0-100_1-Layer": 82.7,
"AR50,0-100_2-Layer": 65.87,
"AR50,0-100_3-Layer": 75.72,
"AR50,0-100_4-Layer": 78.71,
"AR,0-100_1-Layer": 48.18,
"AR,0-100_2-Layer": 36.85,
"AR,0-100_3-Layer": 43.46,
"AR,0-100_4-Layer": 44.58,
"AR50,100-200_1-Layer": 93.3,
"AR50,100-200_2-Layer": 87.8,
"AR50,100-200_3-Layer": 82.0,
"AR50,100-200_4-Layer": 90.78,
"AR,100-200_1-Layer": 66.88,
"AR,100-200_2-Layer": 55.79,
"AR,100-200_3-Layer": 50.67,
"AR,100-200_4-Layer": 56.1,
"AR50,200-400_1-Layer": 92.59,
"AR50,200-400_2-Layer": 95.9,
"AR50,200-400_3-Layer": 91.15,
"AR50,200-400_4-Layer": 96.59,
"AR,200-400_1-Layer": 72.15,
"AR,200-400_2-Layer": 71.31,
"AR,200-400_3-Layer": 67.43,
"AR,200-400_4-Layer": 70.0,
"AR50,400-inf_1-Layer": 100.0,
"AR50,400-inf_2-Layer": 100.0,
"AR50,400-inf_3-Layer": 99.06,
"AR50,400-inf_4-Layer": 93.64,
"AR,400-inf_1-Layer": 88.67,
"AR,400-inf_2-Layer": 85.57,
"AR,400-inf_3-Layer": 84.81,
"AR,400-inf_4-Layer": 77.55
}
}
The AP and AR (Instance) Metrics are mostly the same, if even a bit better than the ones reported in the paper.
The values in brackets are from the paper, Table I. The small differences can be explained by the random sampling, due to different random seeds. The paper has metrics which are obtained through training the model multiple times with random seeds and reporting the mean.
Graphene
AP50_1-Layer: 48.62 (48.5)
AP50_2-Layer: 55.42 (55.0)
AP50_3-Layer: 59.21 (58.1)
AP50_4-Layer: 58.6 (58.0)
AR50_1-Layer: 89.13 (89.2)
AR50_2-Layer: 80.53 (80.3)
AR50_3-Layer: 84.15 (84.6)
AR50_4-Layer: 88.66 (88.4)
WSe2
AP50_1-Layer: 60.09 (62.1)
AP50_2-Layer: 53.45 (57.1)
AP50_3-Layer: 53.99 (52.8)
AR50_1-Layer: 67.86 (70.5)
AR50_2-Layer: 64.39 (66.9)
AR50_3-Layer: 80.7 (80.7)
I am currently rerunning the semantic evaluation and will get back to you.
I found the Bug. The default parameters of the MaterialDetector where off. It ignored flakes which are smaller than 1000 pixels, leading to worse recall.
evaluate_semantic_metrics.py
...
# Read the contrast parameters
with open(contrast_path) as f:
contrast_dict = json.load(f)
# This is the error, the default size threshold is set to 1000, so it will ignore smaller flakes
# Setting this to the same as in evaluate_instance_metrics.py yields the same results as the paper
# myDetector = MaterialDetector(contrast_dict=contrast_dict)
myDetector = MaterialDetector(
contrast_dict=contrast_dict,
standard_deviation_threshold=5,
size_threshold=200,
used_channels="BGR",
)
# set up the confusion matrices
confusion_matrices = {
fp: ConfusionMatrix(
num_classes=NUM_CLASSES[material] + 1,
ignore_label=NUM_CLASSES[material] + 2,
)
for fp in FP_RANGE
}
...
I will update the code to reproduce the results better. Thanks for the headsup chenller!
I ran the new code, and now the new results are similar to those in the paper. I have an additional question, although the current research has achieved good results, there is still a lot of room for improvement. Will the author continue to improve and optimize the code to achieve better results?
We are currently developing an improved version which builds on the current model, I sadly can not tell you more, but be on the lookout for a new paper from us
Looking forward to your new achievements. Thank you for your contribution.
Thank you very much for your work. I intended to reproduce the algorithm, but there was a serious deviation. After cloning the code, I ran a file called
evaluate_semantic_metrics.py
. I only changed the dataset path (variableDATASET-ROOT
in the script). I have obtained the following results. There is a significant gap compared to the data in the paper. I don't know why.result
Graphene_semantic_metrics.png WSe2_semantic_metrics.png