Open Coldermyy opened 1 week ago
You need to modify the code as here: https://github.com/ibrahimethemhamamci/CT-CLIP/tree/main/scripts#volume-to-volume-and-report-to-volume-retrieval. and save the latents as npz file. Actually, this is very strange for an open-source repo to hint you that: "you need to implement the function according to our tutorial to re-produce our results"....
You need to change the code, run the "zero_shot.py" to generate the latents first, then use the latents as the input of "volume_to_volume_new.py".
Actually, I came across many bugs in this repo with no reply from the author. Later I will submit a PR to solve all these issues if available.
You can reference my code here in zero_shot.py train step function:
def train_step(self):
device = self.device
steps = int(self.steps.item())
# logs
logs = {}
txt_latent_save_dir = os.path.join(self.result_folder_txt, "text")
img_latent_save_dir = os.path.join(self.result_folder_txt, "image")
os.makedirs(txt_latent_save_dir, exist_ok=True)
os.makedirs(img_latent_save_dir, exist_ok=True)
if True:
with torch.no_grad():
models_to_evaluate = ((self.CTClip, str(steps)),)
for model, filename in models_to_evaluate:
model.eval()
# predictedall=[]
# realall=[]
# logits = []
# text_latent_list = []
# image_latent_list = []
# accession_names=[]
# pathologies = ['Medical material','Arterial wall calcification', 'Cardiomegaly', 'Pericardial effusion','Coronary artery wall calcification', 'Hiatal hernia','Lymphadenopathy', 'Emphysema', 'Atelectasis', 'Lung nodule','Lung opacity', 'Pulmonary fibrotic sequela', 'Pleural effusion', 'Mosaic attenuation pattern','Peribronchial thickening', 'Consolidation', 'Bronchiectasis','Interlobular septal thickening']
for i in tqdm.tqdm(range(len(self.ds))):
# if i > 10:
# break
valid_data, text, onehotlabels, acc_name = next(self.dl_iter)
print(f"i: {i}, acc_name: {acc_name}")
plotdir = self.result_folder_txt
Path(plotdir).mkdir(parents=True, exist_ok=True)
text_tokens=self.tokenizer(
text, return_tensors="pt", padding="max_length", truncation=True, max_length=512).to(device)
outs = model(text_tokens, valid_data.cuda(), device=device, return_latents=True)
for j, out in enumerate(outs):
print(f"out shape: {out.shape}, j = {j}")
text_latents, image_latents, *_ = outs
# text_latent_list.append(text_latents.detach().cpu().numpy())
# image_latent_list.append(image_latents.detach().cpu().numpy())
txt_latent_save_path = os.path.join(txt_latent_save_dir, f"{acc_name[0]}.npz")
img_latent_save_path = os.path.join(img_latent_save_dir, f"{acc_name[0]}.npz")
np.savez(txt_latent_save_path, arr=text_latents.detach().cpu().numpy())
np.savez(img_latent_save_path, arr=image_latents.detach().cpu().numpy())
# predictedlabels=[]
# onehotlabels_append=[]
# for pathology in pathologies:
# text = [f"{pathology} is present.", f"{pathology} is not present."]
# output = model(text_tokens, valid_data.cuda(), device=device)
# output = apply_softmax(output)
# append_out=output.detach().cpu().numpy()
# predictedlabels.append(append_out[0])
# predictedall.append(predictedlabels)
# realall.append(onehotlabels.detach().cpu().numpy()[0])
# accession_names.append(acc_name[0])
# print(f"finished {i} out of {len(self.ds)}")
# realall=np.array(realall)
# predictedall=np.array(predictedall)
# print(f"saving results to {plotdir}")
# np.savez(f"{plotdir}labels_weights.npz", data=realall)
# np.savez(f"{plotdir}predicted_weights.npz", data=predictedall)
# with open(f"{plotdir}accessions.txt", "w") as file:
# for item in accession_names:
# file.write(item + "\n")
# dfs=evaluate_internal(predictedall,realall,pathologies, plotdir)
# writer = pd.ExcelWriter(f'{plotdir}aurocs.xlsx', engine='xlsxwriter')
# dfs.to_excel(writer, sheet_name='Sheet1', index=False)
# writer.close()
self.steps += 1
return logs
Which file is the ‘data_folder = “/path_to_valid_latents_folder/image/” in the “volume_to_volume_new.py” file, is it the image_latents.npz file generated from running Is it the image_latents.npz file generated by running_zero_shot.py? And, what functionality does the changed code add?