This is an opensource learning pipeline containing the multiple fractions for WSI and ROI foundational models.
The licenses for the improted code follows their original code.
On an NVIDIA A100 Tensor Core GPU machine, with CUDA toolkit enabled.
Download our repository and open the path
git clone https://github.com/sagizty/BigModel.git
cd BigModel
Install dependencies
conda env create -f environment.yaml
conda activate BigModel
pip install -e .
Tile Cropping
python Tiles_dataset.py \
--WSI_dataset_path /data/hdd_1/BigModel/TCGA-LUAD-LUSC/TCGA-LUAD-raw \
--tiled_WSI_dataset_path /data/hdd_1/BigModel/TCGA-LUAD-LUSC/tiles_datasets \
--edge_size 224 \
--target_mpp 0.5
Tile Embedding
python Embedded_dataset.py \
--WSI_dataset_path /data/hdd_1/BigModel/TCGA-LUAD-LUSC/tiles_datasets \
--embedded_WSI_dataset_path /data/hdd_1/BigModel/TCGA-LUAD-LUSC/slide_embeddings/gigapath \
--model_name gigapath \
--edge_size 224 \
--PrefetchDataLoader_num_workers 10 \
--batch_size 256
Build MTL dataset for WSI
python DownStream/MTL/slide_dataset_tools.py \
--root_path /data/hdd_1/BigModel/embedded_datasets/TCGA-LUAD-LUSC-gigapath \
--task_description_csv /home/workenv/PuzzleAI/Archive/dataset_csv/TCGA_Log_Transcriptome_Final.csv \
--slide_id_key patient_id \
--split_target_key fold_information \
--task_setting_folder_name task-settings \
--mode TCGA \
--dataset_name luad-lusc
Run MTL task with WSI MTL framwork
# Train
python DownStream/WSI_finetune/MTL_Train.py \
--model_name gigapath \
--root_path /data/ssd_1/CPIA_processed/embedded_datasets/TCGA-COAD \
--local_weight_path /home/workenv/PuzzleAI/ModelWeight/prov-gigapath/slide_encoder.pth \
--save_model_path /data/private/BigModel/saved_models \
--runs_path /data/private/BigModel/runs \
--task_description_csv /home/workenv/PuzzleAI/Archive/dataset_csv/TCGA_Log_Transcriptome_Final.csv \
--task_setting_folder_name task-settings \
--slide_id_key patient_id \
--split_target_key fold_information \
--num_epochs 100 \
--warmup_epochs 10 \
--intake_epochs 50
# Test
python DownStream/WSI_finetune/MTL_Test.py \
--model_name gigapath \
--root_path /data/ssd_1/CPIA_processed/embedded_datasets/TCGA-COAD \
--save_model_path /data/private/BigModel/saved_models \
--runs_path /data/private/BigModel/runs \
--task_description_csv /home/workenv/PuzzleAI/Archive/dataset_csv/TCGA_Log_Transcriptome_Final.csv \
--task_setting_folder_name task-settings \
--slide_id_key patient_id \
--split_target_key fold_information
# Decode the test results to csv
python Utils/Decode_correlation.py \
--model_name gigapath \
--root_path /data/ssd_1/CPIA_processed/embedded_datasets/TCGA-COAD \
--runs_path /data/private/BigModel/runs \
--WSI_tasks True \
--task_setting_folder_name task-settings
# todo need demo here
# todo
# todo
# todo
# todo
# todo