Also for nested resampling. Here's some code that one of us can start with:
import numpy as np
from sklearn.model_selection import GroupShuffleSplit
X = np.array([[0,1,2], [0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2], [0,1,2],[0,1,2],[0,1,2],[0,1,2]])
y = np.array([1,1,0,1,0,0,1,1,0,1])
groups = np.array([0,1,2,3,4,4,5,5,6,7])
seed_value = 1234
n_splits = 30
test_size = 0.2
folds = GroupShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=seed_value)
splits = folds.split(X, y, groups=groups)
for train_index, test_index in splits:
X_train = np.array(X)[train_index]
y_train = np.array(y)[train_index]
X_test = np.array(X)[test_index]
y_test = np.array(y)[test_index]
# you shouldnt see 4 in train and 4 in test (same with 5)
print(groups[train_index])
print(groups[test_index])
print('\n')
Also for nested resampling. Here's some code that one of us can start with: