ray-project / ray_lightning

Pytorch Lightning Distributed Accelerators using Ray
Apache License 2.0
211 stars 34 forks source link

Trails did not complete error #257

Open Bk073 opened 1 year ago

Bk073 commented 1 year ago

I wanted to do distributed hyperparameter tuning. And I am getting trials did not complete error

import lightning as pl
from ray import air, tune
from ray_lightning import RayStrategy
from ray_lightning.tune import TuneReportCallback, get_tune_resources

def main(config):
     model = LighteningModel(config)

      callback = TuneReportCallback(
        {
            "val_loss": "val_total_loss",
        },
        on="validation_end")

        trainer = pl.Trainer(max_epochs=4, callbacks=[callbacks],
                         strategy=RayStrategy(num_workers=1, use_gpu=False))
         trainer.fit(model, train_dataloader, val_dataloader)

def train():
     search_space = {
                'lr': tune.choice([1e-2, 1e-3, 1e-4])
             }
       num_samples=1

       tuner = tune.Tuner(
             tune.with_resources(
              main,
              get_tune_resources(num_workers=1, use_gpu=False),
           ),
             param_space=search_space,
    )
    results = tuner.fit()

I am following these two links.