oracle / ocifs

ocifs provides a POSIX-compatible API wrapping Oracle Cloud Infrastructure's (OCI) Object Storage. ocifs is a python library that relies on the fsspec framework.
https://ocifs.readthedocs.io/en/latest/
Universal Permissive License v1.0
17 stars 9 forks source link

fsspec.put is broken for uploading local files #38

Open Skylion007 opened 7 months ago

Skylion007 commented 7 months ago
fs.put('/dbfs/aaron/release/CVPR-2024-release/yfcc100m-lowres-HF-CVPR-1000max/licensename=Attribution License/least_dim_range=256-512/photoid_last_digit=0.0/_SUCCESS', 'oci://mosaicml-internal-dataset-yfcc100m@redacted/yfcc100m/cvpr2024-release/commoncatalog-lowres-HF-CVPR-1000max/v0/licensename-Attribution_License/least_dim_range-256-512/photoid_last_digit-0.0/_SUCCESS')

fails to upload to an oci bucket, yet the following works:

fs.touch('oci://mosaicml-internal-dataset-yfcc100m@redacted/yfcc100m/cvpr2024-release/commoncatalog-lowres-HF-CVPR-1000max/v0/licensename-Attribution_License/least_dim_range-256-512/photoid_last_digit-0.0/_SUCCESS')

fs.put is improperly resolving the local file as an oci bucket and erroring when it cannot determine the bucket name. I am not sure if it's because the local filepath has spaces or what.

The full stacktrace implies it's the left path that is struggling with, which is absurd as it's a local file object:

databricks/python/lib/python3.11/site-packages/fsspec/spec.py:958, in AbstractFileSystem.put(self, lpath, rpath, recursive, callback, **kwargs)
    955 lpaths = fs.expand_path(lpath, recursive=recursive)
    956 if source_is_str and not recursive:
    957     # Non-recursive glob does not copy directories
--> 958     lpaths = [p for p in lpaths if not (trailing_sep(p) or self.isdir(p))]
    959     if not lpaths:
    960         return
File /databricks/python/lib/python3.11/site-packages/fsspec/spec.py:958, in <listcomp>(.0)
    955 lpaths = fs.expand_path(lpath, recursive=recursive)
    956 if source_is_str and not recursive:
    957     # Non-recursive glob does not copy directories
--> 958     lpaths = [p for p in lpaths if not (trailing_sep(p) or self.isdir(p))]
    959     if not lpaths:
    960         return
File /databricks/python/lib/python3.11/site-packages/fsspec/spec.py:674, in AbstractFileSystem.isdir(self, path)
    672 """Is this entry directory-like?"""
    673 try:
--> 674     return self.info(path)["type"] == "directory"
    675 except IOError:
    676     return False
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-050b4490-65b9-4b05-bd95-6d78cdebdf1a/lib/python3.11/site-packages/ocifs/core.py:793, in OCIFileSystem.info(self, path, **kwargs)
    791 if key:
    792     try:
--> 793         obj_data = self._call_oci(
    794             self.oci_client.head_object,
    795             namespace_name=namespace,
    796             bucket_name=bucket,
    797             object_name=key,
    798             **kwargs,
    799         ).headers
    800     except ServiceError as e:
    801         if e.status == 404:
    802             # Check for subdirectories
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-050b4490-65b9-4b05-bd95-6d78cdebdf1a/lib/python3.11/site-packages/ocifs/core.py:230, in OCIFileSystem._call_oci(self, method, is_detail_method, *akwarglist, **kwargs)
    228     self.connect(refresh=True)
    229     return method(**additional_kwargs)
--> 230 raise e
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-050b4490-65b9-4b05-bd95-6d78cdebdf1a/lib/python3.11/site-packages/ocifs/core.py:225, in OCIFileSystem._call_oci(self, method, is_detail_method, *akwarglist, **kwargs)
    223 logger.debug("CALL: %s - %s" % (method.__name__, additional_kwargs))
    224 try:
--> 225     return method(**additional_kwargs)
    226 except Exception as e:
    227     if str(getattr(e, "code", "")) in ["401", "402", "403"]:
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-050b4490-65b9-4b05-bd95-6d78cdebdf1a/lib/python3.11/site-packages/ocifs/data_lake/lake_sharing_object_storage_client.py:1143, in LakeSharingObjectStorageClient.head_object(self, namespace_name, bucket_name, object_name, **kwargs)
   1130         return self.base_client.call_api(
   1131             resource_path=resource_path,
   1132             method=method,
   (...)
   1139             required_arguments=required_arguments,
   1140         )
   1142 else:
-> 1143     return ObjectStorageClient.head_object(
   1144         self, namespace_name, bucket_name, object_name, **kwargs
   1145     )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-050b4490-65b9-4b05-bd95-6d78cdebdf1a/lib/python3.11/site-packages/oci/object_storage/object_storage_client.py:3187, in ObjectStorageClient.head_object(self, namespace_name, bucket_name, object_name, **kwargs)
   3185 for (k, v) in six.iteritems(path_params):
   3186     if v is None or (isinstance(v, six.string_types) and len(v.strip()) == 0):
-> 3187         raise ValueError(f'Parameter {k} cannot be None, whitespace or empty string')
   3189 query_params = {
   3190     "versionId": kwargs.get("version_id", missing)
   3191 }
   3192 query_params = {k: v for (k, v) in six.iteritems(query_params) if v is not missing and v is not None}