Is RandomForestClassifier oob posteriors also biased without stratification?

We can test:

def test_random_forest_posteriors_on_independent():
    """Test regression from :gh:`283`.

    Posteriors were biased when the classes were independent and using the bootstrap and oob sample
    technique to estimate the final population test statistic. This resulted in a biased estimate
    of the AUC score. Stratification of the bootstrapping samples was the solution to this problem.
    """
    from sktree import RandomForestClassifier
    from sklearn.datasets import make_classification
    scores = []
    for idx in range(5):
        # create a dataset with overlapping classes
        X = np.random.standard_normal(size=(128, 4096))
        y = np.vstack([np.zeros(64), np.ones(64)]).ravel()
        y = y.reshape(-1, 1)
        clf = RandomForestClassifier(
            n_estimators=100,
            random_state=idx,
            bootstrap=True,
            max_samples=1.0,
            n_jobs=-1,
            # stratify=True,
        )
        clf.fit(X, y)

        oob_posteriors = np.empty((len(clf.estimators_), X.shape[0], 2))
        for idx, (tree, inbag_idx) in enumerate(zip(clf.estimators_, clf.estimators_samples_)):
            oob_idx = np.array([idx for idx in range(X.shape[0]) if idx not in inbag_idx])
            oob_posteriors[idx, oob_idx, :] = tree.predict_proba(X[oob_idx])
        auc_score = roc_auc_score(y, np.nanmean(oob_posteriors, axis=0)[:, 1])
        scores.append(auc_score)

    # Without stratification, this test should fail
    print(np.mean(scores), scores)
    assert np.mean(scores) > 0.49 and np.mean(scores) < 0.51, f"{np.mean(scores)} {scores}"
    assert False

neurodata / scikit-tree

Is RandomForestClassifier oob posteriors also biased without stratification? #287