Lightning-AI / litdata

Transform datasets at scale. Optimize datasets for fast AI model training.
Apache License 2.0
335 stars 39 forks source link

NameError: name 'V1DatasetType' is not defined #144

Closed robmarkcole closed 4 months ago

robmarkcole commented 4 months ago

🐛 Bug

On Lightning.ai, importing litdata results in this error

To Reproduce

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 import litdata

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/__init__.py:4
      1 from lightning_utilities.core.imports import RequirementCache
      3 from litdata.__about__ import *  # noqa: F403
----> 4 from litdata.processing.functions import map, optimize, walk
      5 from litdata.streaming.combined import CombinedStreamingDataset
      6 from litdata.streaming.dataloader import StreamingDataLoader

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/processing/functions.py:26
     23 import torch
     25 from litdata.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
---> 26 from litdata.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
     27 from litdata.processing.readers import BaseReader
     28 from litdata.processing.utilities import optimize_dns_context

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/processing/data_processor.py:32
     22 from tqdm.auto import tqdm as _tqdm
     24 from litdata.constants import (
     25     _BOTO3_AVAILABLE,
     26     _DEFAULT_FAST_DEV_RUN_ITEMS,
   (...)
     30     _TORCH_GREATER_EQUAL_2_1_0,
     31 )
---> 32 from litdata.processing.readers import BaseReader, StreamingDataLoaderReader
     33 from litdata.processing.utilities import _create_dataset
     34 from litdata.streaming import Cache

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/processing/readers.py:9
      6 from lightning_utilities.core.imports import RequirementCache
      7 from tqdm import tqdm
----> 9 from litdata.streaming.dataloader import StreamingDataLoader
     11 _PYARROW_AVAILABLE = RequirementCache("pyarrow")
     14 class BaseReader(ABC):

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/streaming/__init__.py:14
      1 # Copyright The Lightning AI team.
      2 # Licensed under the Apache License, Version 2.0 (the "License");
      3 # you may not use this file except in compliance with the License.
   (...)
     11 # See the License for the specific language governing permissions and
     12 # limitations under the License.
---> 14 from litdata.streaming.cache import Cache
     15 from litdata.streaming.combined import CombinedStreamingDataset
     16 from litdata.streaming.dataloader import StreamingDataLoader

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/streaming/cache.py:28
     26 from litdata.streaming.sampler import ChunkedIndex
     27 from litdata.streaming.serializers import Serializer
---> 28 from litdata.streaming.writer import BinaryWriter
     29 from litdata.utilities.env import _DistributedEnv, _WorkerEnv
     30 from litdata.utilities.format import _convert_bytes_to_int

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/streaming/writer.py:25
     22 import torch
     24 from litdata.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
---> 25 from litdata.processing.utilities import get_worker_rank
     26 from litdata.streaming.compression import _COMPRESSORS, Compressor
     27 from litdata.streaming.serializers import Serializer, _get_serializers

File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/litdata/processing/utilities.py:22
     15     from lightning_cloud.openapi.rest import ApiException
     16     from lightning_cloud.rest_client import LightningClient
     19 def _create_dataset(
     20     input_dir: Optional[str],
     21     storage_dir: str,
---> 22     dataset_type: V1DatasetType,
     23     empty: Optional[bool] = None,
     24     size: Optional[int] = None,
     25     num_bytes: Optional[str] = None,
     26     data_format: Optional[Union[str, Tuple[str]]] = None,
     27     compression: Optional[str] = None,
     28     num_chunks: Optional[int] = None,
     29     num_bytes_per_chunk: Optional[List[int]] = None,
     30     name: Optional[str] = None,
     31     version: Optional[int] = None,
     32 ) -> None:
     33     """Create a dataset with metadata information about its source and destination."""
     34     project_id = os.getenv("LIGHTNING_CLOUD_PROJECT_ID", None)

NameError: name 'V1DatasetType' is not defined

Environment

On lightning.ai studio

litdata==0.2.2

robmarkcole commented 4 months ago

Appears to be resolved with pip install 'litdata[extras]'