Datasets interfere with each other when using pytest

I have an application which uses datalad-hirni internally for the bids conversion. I am currently writing tests for this application and came across a strange problem with individual tests interfering with each other. I managed to break it down to an example only using datalad-hirni (nothing from my application is used anymore).

In the first test test_import data and then the default rule template is imported (all inside one temp folder) and in the second test test_full (in a different temp folder) a whole conversion takes place: import data, import rule, convert. Here the second test fails. But strangely:

If test_full is run alone (first one is disabled), it works.
If instead of the rule template from hirni the full rule for the dataset is used in test_import, test_full works again.

That means that test_import is interfering with test_full although they are running in separate temporary directories and different datasets. How can that be?

Here my minimal example to reproduce it:

cutom_rule_template.py is the rule template from here

custom_rule.py is a full rule for this dataset

import contextlib
import json
import os
from pathlib import Path
import shutil
import subprocess
import tempfile

import pytest

import datalad.api as datalad

RULE_DIR = Path(__file__).parent.absolute()

class ChangeWorkingDir(contextlib.ContextDecorator):
    """ Change the working directory temporaly """

    def __init__(self, new_wd):
        self.current_wd = None
        self.new_wd = new_wd

    def __enter__(self):
        self.current_wd = Path.cwd()
        os.chdir(self.new_wd)

    def __exit__(self, exc_type, exc_val, exc_tb):
        os.chdir(self.current_wd)
        # signal that the exception was handled and the program should continue
        return True

class SourceConfiguration():
    """ Enables configuration of rules to for bids convertions """

    def __init__(self, dataset_path):
        self.dataset_path = Path(dataset_path)
        self.dataset = datalad.Dataset(self.dataset_path)

        self.acqid = "bids_rule_config"
        self.spec_file = Path(self.dataset_path, self.acqid, "studyspec.json")

    def import_rule(self, rule):
        """Import datalad hirni rule"""

        # handle "~/" paths
        rule = Path(rule).expanduser().resolve()

        self._register_and_add_rule(rule)
        self._create_studyspec()

    def _register_and_add_rule(self, rule_template):
        """Register datalad hirni rule"""

        rule_dir = Path("code/custom_rules")
        rule_file = Path(rule_dir / "custom_rules.py")

        self.dataset.config.set("datalad.hirni.dicom2spec.rules",
                            rule_file, where="dataset")

        self._reset_studyspec()

        # create custom rule dir
        Path(self.dataset_path, rule_dir).mkdir(parents=True, exist_ok=True)

        shutil.copy(rule_template,
                    self.dataset_path / "code/custom_rules/custom_rules.py")

        datalad.save(rule_file, dataset=self.dataset,
                     message="Add/modify custom rule", to_git=True)

    def _reset_studyspec(self):
        """ reset studyspec to avoid problems with next imported dataset """

        if not self.spec_file.exists():
            # Nothing to do
            return

        lines = self.spec_file.read_text().strip().split("\n")
        spec_list = list(map(json.loads, lines))

        dicomseries_all = [i for i in spec_list
                           if i["type"] == "dicomseries:all"]

        # write dicomseries:all
        with self.spec_file.open("w") as f:
            for i in dicomseries_all:
                f.write(json.dumps(i) + "\n")

        datalad.save(path=self.spec_file, dataset=self.dataset,
                     message="Reset studyspec file")

    def _create_studyspec(self):
        spec = self.spec_file.relative_to(self.dataset_path)

        # Fix needed since dicom2spec only looks for rule file in current dir
        # and not in dataset dir
        with ChangeWorkingDir(self.dataset_path):
            datalad.hirni_dicom2spec(
                path=str(Path(self.acqid, "dicoms")),
                spec=spec,
                dataset=self.dataset,
            )

def test_import(tmp_path):

    source_dataset_path = tmp_path / "sourcedata"
    datalad.create(str(source_dataset_path), cfg_proc="hirni")

    import_data(source_dataset_path)

    # fails:
    #SourceConfiguration(source_dataset_path).import_rule(
    #    RULE_DIR / "custom_rules_template.py"
    #)
    # works:
    SourceConfiguration(source_dataset_path).import_rule(
        RULE_DIR / "custom_rules.py"
    )

def test_full(tmp_path):

    source_dataset_path = tmp_path / "sourcedata"
    datalad.create(str(source_dataset_path), cfg_proc="hirni")

    import_data(source_dataset_path)
    SourceConfiguration(source_dataset_path).import_rule(
        RULE_DIR / "custom_rules.py"
    )

    bids_dataset_path = tmp_path / "bids"
    datalad.create(str(bids_dataset_path), cfg_proc="bids")

    # install
    with ChangeWorkingDir(bids_dataset_path):
        cmd = ["datalad", "install",
                "--dataset", bids_dataset_path,
                "--source", source_dataset_path,
                "sourcedata",
                "--recursive"]
        run_cmd(cmd)

    # convert
    with ChangeWorkingDir(bids_dataset_path):
        cmd = ["datalad", "hirni-spec2bids",
                "--anonymize", "sourcedata/studyspec.json",
                "sourcedata/bids_rule_config/studyspec.json"]
        run_cmd(cmd)

    conversion_dir = tmp_path / "bids/sub-bidsconfig"
    assert conversion_dir.exists()

def import_data(source_dataset_path):
    # import data
    path = Path("<my_data_path>/sourcedata.tar.xz")
    with ChangeWorkingDir(source_dataset_path):
        # datalad hirni-import-dcm --anon-subject "$ANON" \
        #   ../../original/sourcedata.tar.gz sourcedata
        datalad.hirni_import_dcm(
            dataset=datalad.Dataset(source_dataset_path),
            anon_subject="bidsconfig",
            path=path,
            acqid="bids_rule_config",
        )

def run_cmd(cmd):
    # pylint: disable=subprocess-run-check
    proc = subprocess.run(cmd, capture_output=True)
    if proc.returncode:
        if proc.stdout:
            print(proc.stdout.decode("utf-8"))
        raise Exception("Command failed with error {}"
                        .format(proc.stderr.decode("utf-8")))

psychoinformatics-de / datalad-hirni

Datasets interfere with each other when using pytest #198