Open chamecall opened 2 years ago
As I can see the project is not supposed to be used for real time anomaly detection cause the model doesn't work with only one 16 frame sample
Traceback (most recent call last): File "main.py", line 48, in <module> auc = test(test_loader, model, args, viz, device) File "/home/algernone/git-reps/RTFM/test_10crop.py", line 19, in test scores_nor_bottom, scores_nor_abn_bag, feat_magnitudes = model(inputs=input) File "/home/algernone/.pyenv/versions/3.7.13/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl return forward_call(*input, **kwargs) File "/home/algernone/git-reps/RTFM/model.py", line 236, in forward idx_abn = torch.topk(afea_magnitudes_drop, k_abn, dim=1)[1] RuntimeError: selected index k out of range
could you please confirm the point?
Hi yes. This is a typo. In the paper, we incorrectly try to refer to inference time as 'real time'. So the model can not really handle the online real-time detection.
As I can see the project is not supposed to be used for real time anomaly detection cause the model doesn't work with only one 16 frame sample
Traceback (most recent call last): File "main.py", line 48, in <module> auc = test(test_loader, model, args, viz, device) File "/home/algernone/git-reps/RTFM/test_10crop.py", line 19, in test scores_nor_bottom, scores_nor_abn_bag, feat_magnitudes = model(inputs=input) File "/home/algernone/.pyenv/versions/3.7.13/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl return forward_call(*input, **kwargs) File "/home/algernone/git-reps/RTFM/model.py", line 236, in forward idx_abn = torch.topk(afea_magnitudes_drop, k_abn, dim=1)[1] RuntimeError: selected index k out of range
could you please confirm the point?Hi yes. This is a typo. In the paper, we incorrectly try to refer to inference time as 'real time'. So the model can not really handle the online real-time detection.
though) I replaced your forward code
class Model(nn.Module):
def __init__(self, n_features, batch_size):
super(Model, self).__init__()
self.batch_size = batch_size
self.num_segments = 32
self.k_abn = self.num_segments // 10
self.k_nor = self.num_segments // 10
self.Aggregate = Aggregate(len_feature=2048)
self.fc1 = nn.Linear(n_features, 512)
self.fc2 = nn.Linear(512, 128)
self.fc3 = nn.Linear(128, 1)
self.drop_out = nn.Dropout(0.7)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
self.apply(weight_init)
def forward(self, inputs):
k_abn = self.k_abn
k_nor = self.k_nor
out = inputs
bs, ncrops, t, f = out.size()
out = out.view(-1, t, f)
out = self.Aggregate(out)
out = self.drop_out(out)
features = out
scores = self.relu(self.fc1(features))
scores = self.drop_out(scores)
scores = self.relu(self.fc2(scores))
scores = self.drop_out(scores)
scores = self.sigmoid(self.fc3(scores))
scores = scores.view(bs, ncrops, -1).mean(1)
scores = scores.unsqueeze(dim=2)
normal_features = features[0:self.batch_size*10]
normal_scores = scores[0:self.batch_size]
abnormal_features = features[self.batch_size*10:]
abnormal_scores = scores[self.batch_size:]
feat_magnitudes = torch.norm(features, p=2, dim=2)
feat_magnitudes = feat_magnitudes.view(bs, ncrops, -1).mean(1)
nfea_magnitudes = feat_magnitudes[0:self.batch_size] # normal feature magnitudes
afea_magnitudes = feat_magnitudes[self.batch_size:] # abnormal feature magnitudes
n_size = nfea_magnitudes.shape[0]
if nfea_magnitudes.shape[0] == 1: # this is for inference, the batch size is 1
afea_magnitudes = nfea_magnitudes
abnormal_scores = normal_scores
abnormal_features = normal_features
select_idx = torch.ones_like(nfea_magnitudes)
select_idx = self.drop_out(select_idx)
####### process abnormal videos -> select top3 feature magnitude #######
afea_magnitudes_drop = afea_magnitudes * select_idx
idx_abn = torch.topk(afea_magnitudes_drop, k_abn, dim=1)[1]
idx_abn_feat = idx_abn.unsqueeze(2).expand([-1, -1, abnormal_features.shape[2]])
abnormal_features = abnormal_features.view(n_size, ncrops, t, f)
abnormal_features = abnormal_features.permute(1, 0, 2,3)
total_select_abn_feature = torch.zeros(0, device=inputs.device)
for abnormal_feature in abnormal_features:
feat_select_abn = torch.gather(abnormal_feature, 1, idx_abn_feat) # top 3 features magnitude in abnormal bag
total_select_abn_feature = torch.cat((total_select_abn_feature, feat_select_abn))
idx_abn_score = idx_abn.unsqueeze(2).expand([-1, -1, abnormal_scores.shape[2]])
score_abnormal = torch.mean(torch.gather(abnormal_scores, 1, idx_abn_score), dim=1) # top 3 scores in abnormal bag based on the top-3 magnitude
####### process normal videos -> select top3 feature magnitude #######
select_idx_normal = torch.ones_like(nfea_magnitudes)
select_idx_normal = self.drop_out(select_idx_normal)
nfea_magnitudes_drop = nfea_magnitudes * select_idx_normal
idx_normal = torch.topk(nfea_magnitudes_drop, k_nor, dim=1)[1]
idx_normal_feat = idx_normal.unsqueeze(2).expand([-1, -1, normal_features.shape[2]])
normal_features = normal_features.view(n_size, ncrops, t, f)
normal_features = normal_features.permute(1, 0, 2, 3)
total_select_nor_feature = torch.zeros(0, device=inputs.device)
for nor_fea in normal_features:
feat_select_normal = torch.gather(nor_fea, 1, idx_normal_feat) # top 3 features magnitude in normal bag (hard negative)
total_select_nor_feature = torch.cat((total_select_nor_feature, feat_select_normal))
idx_normal_score = idx_normal.unsqueeze(2).expand([-1, -1, normal_scores.shape[2]])
score_normal = torch.mean(torch.gather(normal_scores, 1, idx_normal_score), dim=1) # top 3 scores in normal bag
feat_select_abn = total_select_abn_feature
feat_select_normal = total_select_nor_feature
return score_abnormal, score_normal, feat_select_abn, feat_select_normal, feat_select_abn, feat_select_abn, scores, feat_select_abn, feat_select_abn, feat_magnitudes
to
def forward(self, inputs):
out = inputs
bs, ncrops, t, f = out.size()
out = out.view(-1, t, f)
out = self.Aggregate(out)
out = self.drop_out(out)
features = out
scores = self.relu(self.fc1(features))
scores = self.drop_out(scores)
scores = self.relu(self.fc2(scores))
scores = self.drop_out(scores)
scores = self.sigmoid(self.fc3(scores))
scores = scores.view(bs, ncrops, -1).mean(1)
scores = scores.unsqueeze(dim=2)
return scores
to obtain anomaly score on single 16 frame sample.
I compared these two approaches (I guess we call them offline and online) where in the first case we feed the whole video (Arrest/Arrest001_x264.mp4) features (16 frame samples) and in the second I feed one 16 frame sample at a time and the just concatenated results. These are the visualized offline, online and gt scores accordingly
I mean it's worse than offline approach but we definitely have useful signal. The only question is it still working only in this case or it will work further in this a little worse manner than offline approach.
pproaches (I guess we call them offline and online)
How to feed 16 frames to the model, the n_features is 2048, how to make the input frame fix to the model, thanks.
As I can see the project is not supposed to be used for real time anomaly detection cause the model doesn't work with only one 16 frame sample
Traceback (most recent call last): File "main.py", line 48, in <module> auc = test(test_loader, model, args, viz, device) File "/home/algernone/git-reps/RTFM/test_10crop.py", line 19, in test scores_nor_bottom, scores_nor_abn_bag, feat_magnitudes = model(inputs=input) File "/home/algernone/.pyenv/versions/3.7.13/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl return forward_call(*input, **kwargs) File "/home/algernone/git-reps/RTFM/model.py", line 236, in forward idx_abn = torch.topk(afea_magnitudes_drop, k_abn, dim=1)[1] RuntimeError: selected index k out of range
could you please confirm the point?