Closed BakingBrains closed 2 years ago
this is my way to generate the embeddings.
with open(os.path.join(os.path.join(root,'CUB_200_2011'),'images.txt'),'r') as f:
for line in f:
image_id,file_name = line.split()
text_file = file_name.replace('.jpg','.txt')
text_file = text_root + text_file
text_list = []
with open(text_file,'r') as f_text:
for line in f_text:
line = line.encode(encoding='UTF-8',errors='strict')
line = line.replace(b'\xef\xbf\xbd\xef\xbf\xbd',b' ')
line = line.decode('UTF-8','strict')
text_list.append(line)
inputs = tokenizer(text_list, return_tensors="pt",padding="max_length",truncation=True, max_length=32)
outputs = model(**inputs)
embedding_mean = outputs[1].mean(dim=0).reshape(1,-1).detach().numpy()
embedding_full = outputs[1].detach().numpy()
embedding_words = outputs[0].detach().numpy()
data_dict = {
'embedding_mean':embedding_mean,
'embedding_full':embedding_full,
'embedding_words':embedding_words,
}
class_name,image_name = file_name.split('/')
if not os.path.exists(os.path.join(embedding_root,class_name)):
os.makedirs(os.path.join(embedding_root,class_name))
embedding_file_path = os.path.join(os.path.join(embedding_root,class_name),image_name.replace('.jpg','.pickle'))
with open(embedding_file_path,'wb') as f_write:
pickle.dump(data_dict,f_write)
Moreover,bert_embedding is embeding of cub which is generated. Hope it helps you!
Thanks a lot @dqshuai. Awesome work. This solved my doubt.
Thank You
Can you please suggest is this the right way to generate the embeddings?
Thank You