Open-source python package for the extraction of Radiomics features from 2D and 3D images and binary masks. Support: https://discourse.slicer.org/c/community/radiomics
Description
I use PyRadiomics to extract voxel-based features and noticed that it is extremely slow, especially GLCM features, like https://github.com/AIM-Harvard/pyradiomics/issues/679.
I use pyinstrument to profile extraction and I found that numpy.delete and ndarray.copy takes lots of time.
import logging
import warnings
from typing import List, Type
import numpy as np
import radiomics
import radiomics.base
import SimpleITK as sitk
import torch
from pyinstrument import Profiler
from radiomics import glcm
from radiomics.featureextractor import RadiomicsFeatureExtractor
from radiomics.imageoperations import checkMask, cropToTumorMask
from tqdm import tqdm
from RadiomicsGLCM2 import RadiomicsGLCM2
def get_default_features():
return ['Autocorrelation', 'JointAverage', 'ClusterProminence', 'ClusterShade',
'ClusterTendency', 'Contrast', 'Correlation', 'DifferenceAverage', 'DifferenceEntropy',
'DifferenceVariance', 'JointEnergy', 'JointEntropy', 'Imc1', 'Imc2',
'Idm', 'Idmn', 'Id', 'Idn', 'InverseVariance', 'MaximumProbability', 'SumEntropy', 'SumSquares']
def get_default_settings():
settings = {}
geometryTolerance = 1e-1
settings["geometryTolerance"] = geometryTolerance
sitk.ProcessObject.SetGlobalDefaultCoordinateTolerance(geometryTolerance)
sitk.ProcessObject.SetGlobalDefaultDirectionTolerance(geometryTolerance)
settings["normalize"] = True
settings["normalizeScale"] = 100
settings["removeOutliers"] = None
settings["resampledPixelSpacing"] = None # We have already resampled
# settings["resampledPixelSpacing"] = [1, 1, 1]
settings["interpolator"] = sitk.sitkBSpline
settings["binWidth"] = 5
settings["voxelArrayShift"] = 300
settings["enableCExtensions"] = True
return settings
def crop(img, mask, ker):
settings = get_default_settings()
boundingBox, _ = checkMask(img, mask, **settings)
return cropToTumorMask(img, mask, boundingBox, padDistance=ker, **settings)
def diff(a: dict, b: dict):
assert len(a.keys()) == len(b.keys())
pairs = [(sitk.GetArrayFromImage(a[n]), sitk.GetArrayFromImage(b[n])) for n in a.keys()]
print("all close:", all(np.allclose(x, y) for x, y in pairs))
print("max abs diff:", max(np.max(np.abs((x - y))) for x, y in pairs))
print("max rel diff1:", max(np.nanmax(np.abs((1 - x / y))) for x, y in pairs))
print("max rel diff2:", max(np.nanmax(np.abs((1 - y / x))) for x, y in pairs))
def test(img, mask, ker,
typeA: Type[radiomics.base.RadiomicsFeaturesBase],
typeB: Type[radiomics.base.RadiomicsFeaturesBase],
features: List[str]):
rf_ext = RadiomicsFeatureExtractor(
voxelBased=True, padDistance=ker,
kernelRadius=ker, maskedKernel=False, voxelBatch=512,
**get_default_settings())
img_norm, mask_norm = rf_ext.loadImage(img, mask, None, **rf_ext.settings)
a_ext = typeA(
img_norm, mask_norm,
voxelBased=True, padDistance=ker,
kernelRadius=ker, maskedKernel=False, voxelBatch=1024,
**get_default_settings())
if features is not None:
a_ext.disableAllFeatures()
for n in features:
a_ext.enableFeatureByName(n)
b_ext = typeB(
img_norm, mask_norm,
voxelBased=True, padDistance=ker,
kernelRadius=ker, maskedKernel=False, voxelBatch=1024,
dtype=torch.float64,
**get_default_settings())
if features is not None:
b_ext.disableAllFeatures()
for n in features:
b_ext.enableFeatureByName(n)
profiler1 = Profiler()
profiler1.start()
a = a_ext.execute()
profiler1.stop()
profiler1.open_in_browser()
profiler2 = Profiler()
profiler2.start()
b = b_ext.execute()
profiler2.stop()
profiler2.open_in_browser()
diff(a, b)
if __name__ == "__main__":
warnings.filterwarnings("ignore", message=".*OMP_NUM_THREADS=\d.*")
warnings.filterwarnings("ignore", message=".*invalid value encountered in divide.*")
radiomics.setVerbosity(logging.INFO) # Verbosity must be at least INFO to enable progress bar
radiomics.progressReporter = tqdm
kernel = 1
size = 24
img = sitk.GetImageFromArray(np.random.randn(size, size, size))
mask = sitk.GetImageFromArray(np.ones((size, size, size)))
test(img, mask, kernel, glcm.RadiomicsGLCM, RadiomicsGLCM2, get_default_features())
RadiomicsGLCM2 is the modified version.
Version (please complete the following information):
OS: Windows 11
Python version: 3.10
PyRadiomics version: 3.1.0
Additional context
Obviously, this PR cannot fully solve this problem.
So, I try to use GPU(PyTorch) to accelerate: pytorchradiomics
Using pytorch gains about 20x~30x acceleration in voxel-based GLCM extraction.
I am not sure it should be included in pyradiomics or just a standalone package.
Any suggestions?
Description I use PyRadiomics to extract voxel-based features and noticed that it is extremely slow, especially GLCM features, like https://github.com/AIM-Harvard/pyradiomics/issues/679. I use pyinstrument to profile extraction and I found that
numpy.delete
andndarray.copy
takes lots of time.so I try to speed up and made a PR(https://github.com/AIM-Harvard/pyradiomics/pull/882). After this modification, I got improvement of 14s:
with very small error:
PyRadiomics configuration NA
PyRadiomics log file
To Reproduce
RadiomicsGLCM2
is the modified version.Version (please complete the following information):
Additional context Obviously, this PR cannot fully solve this problem. So, I try to use GPU(PyTorch) to accelerate: pytorchradiomics Using pytorch gains about 20x~30x acceleration in voxel-based GLCM extraction. I am not sure it should be included in pyradiomics or just a standalone package. Any suggestions?