Open JiahaoYao opened 2 years ago
def tune_test(dir, strategy):
callbacks = [TuneReportCallback(on="validation_end")]
analysis = tune.run(
train_func(dir, strategy, callbacks=callbacks),
config={"max_epochs": tune.choice([1, 2, 3])},
resources_per_trial=get_tune_resources(
num_workers=strategy.num_workers, use_gpu=strategy.use_gpu),
num_samples=2)
assert all(analysis.results_df["training_iteration"] ==
analysis.results_df["config.max_epochs"])
def test_tune_iteration_ddp():
"""Tests if each RayStrategy runs the correct number of iterations."""
tmpdir = './'
strategy = RayStrategy(num_workers=2, use_gpu=True)
tune_test(tmpdir, strategy)
this is the code to reproduce the error.
it seems like the gpu id issue? can not assign torch.cuda.set_device
gives
CUDA error: invalid device ordinal