Open Raising-hrx opened 4 years ago
where is the feature data???
Hi! I can not find features on data/img_feats1.0/. Can you provide them?
Hi! I can not find features on data/img_feats1.0/. Can you provide them?
As a work around, I used the features released by VisDial-BERT here.
I modified the ImageFeaturesHdfReader
class in vdbert/seq2seq_loader.py
to the following code, which is mostly a stitched up version from this and VisDial-BERT repos. Since I haven't benchmarked the model performances, use this at your own risk.
18 import lmdb
19 import base64
20
21
22 class ImageFeaturesHdfReader(object):
23 """
24 A reader for HDF files containing pre-extracted image features. A typical
25 HDF file is expected to have a column named "image_id", and another column
26 named "features".
27
28 Example of an HDF file:
29 ```
30 visdial_train_faster_rcnn_bottomup_features.h5
31 |--- "image_id" [shape: (num_images, )]
32 |--- "features" [shape: (num_images, num_proposals, feature_size)]
33 +--- .attrs ("split", "train")
34 ```
35 Refer ``$PROJECT_ROOT/data/extract_bottomup.py`` script for more details
36 about HDF structure.
37
38 Parameters
39 ----------
40 features_hdfpath : str
41 Path to an HDF file containing VisDial v1.0 train, val or test split
42 image features.
43 in_memory : bool
44 Whether to load the whole HDF file in memory. Beware, these files are
45 sometimes tens of GBs in size. Set this to true if you have sufficient
46 RAM - trade-off between speed and memory.
47 """
48
49 def __init__(self, features_hdfpath: str, in_memory: bool = False):
50 self.features_hdfpath = features_hdfpath
51 self._in_memory = in_memory
52
53 self.env = lmdb.open(
54 features_hdfpath,
55 max_readers=1,
56 readonly=True,
57 lock=False,
58 readahead=False,
59 meminit=False
60 )
61
62 with self.env.begin(write=False) as file_ptr:
63 self._image_id_list = pickle.loads(file_ptr.get('keys'.encode()))
64 # "features" is List[np.ndarray] if the dataset is loaded in-memory
65 # If not loaded in memory, then list of None.
66 self.features = [None] * len(self._image_id_list)
67 self.boxes = [None] * len(self._image_id_list)
68 self.classes = [None] * len(self._image_id_list)
69 self.scores = [None] * len(self._image_id_list)
70
71 def __len__(self):
72 return len(self._image_id_list)
73
74 def __getitem__(self, image_id: int):
75 image_id = str(image_id).encode()
76 index = self._image_id_list.index(image_id)
77 if self._in_memory:
78 # Load features during first epoch, all not loaded together as it
79 # has a slow start.
80 if self.features[index] is not None:
81 image_id_features = self.features[index]
82 boxes = self.boxes[index]
83 single_class = self.classes[index]
84 single_score = self.scores[index]
85
86 else:
87 with self.env.begin(write=False) as file_ptr:
88 item = pickle.loads(file_ptr.get(image_id))
89 num_boxes = int(item["num_boxes"])
90 image_id_features = np.frombuffer(
91 base64.b64decode(item["features"]), dtype=np.float32
92 ).reshape(num_boxes, 2048)
93 boxes = np.frombuffer(
94 base64.b64decode(item["boxes"]), dtype=np.float32
95 ).reshape(num_boxes, 4)
96 class_probs = np.frombuffer(
97 base64.b64decode(item["cls_prob"]), dtype=np.float32
98 ).reshape(num_boxes, 1601)
99
100 single_class = np.argmax(class_probs, axis=1)
101 single_score = np.max(class_probs, axis=1)
102 self.features[index] = image_id_features
103 self.boxes[index] = boxes
104 self.classes[index] = single_class
105 self.scores[index] = single_score
106 else:
107 # Read chunk from file everytime if not loaded in memory.
108 with self.env.begin(write=False) as file_ptr:
109 item = pickle.loads(file_ptr.get(image_id))
110 num_boxes = int(item["num_boxes"])
111 image_id_features = np.frombuffer(
112 base64.b64decode(item["features"]), dtype=np.float32
113 ).reshape(num_boxes, 2048)
114 boxes = np.frombuffer(
115 base64.b64decode(item["boxes"]), dtype=np.float32
116 ).reshape(num_boxes, 4)
117 class_probs = np.frombuffer(
118 base64.b64decode(item["cls_prob"]), dtype=np.float32
119 ).reshape(num_boxes, 1601)
120
121 single_class = np.argmax(class_probs, axis=1)
122 single_score = np.max(class_probs, axis=1)
123 self.features[index] = image_id_features
124 self.boxes[index] = boxes
125 self.classes[index] = single_class
126 self.scores[index] = single_score
127
128 return image_id_features, boxes, single_class, single_score
129
130 def keys(self) -> List[int]:
131 return self._image_id_list
132
133 @property
134 def split(self):
135 return self._split
Hi! Can you provide the download links for "the bottom-up attention visual features of VisDial v1.0"? I can not find these features on data/img_feats1.0/ while these are necessary for running vdbert/train_visdial.py.