Azure / azure-cli

Azure Command-Line Interface
MIT License
3.94k stars 2.92k forks source link

IsADirectoryError: [Errno 21] Is a directory when using command to create_or_update a job. #28261

Open xixidegb opened 6 months ago

xixidegb commented 6 months ago

Describe the bug

Hi all. I am trying to train a MMdetection model according to MMdetection official docs. I edited the config files and call train.py script to run and got this error. This command works fine when I call it in terminal.

Related command

from azure.ai.ml import command from azure.ai.ml import Input

job = command( code='./data/mmdetection/', command='python tools/train.py configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py', environment="azureml:mmdet:1", compute=compute_name, display_name="convNext_mask_rcnn", experiment_name="convNext" )

returned_job = ml_client.create_or_update(job) print(f"Created job: ,{returned_job}")

Errors

IsADirectoryError: [Errno 21] Is a directory: '/data/mmdetection/configs'

Issue script & Debug output


IsADirectoryError Traceback (most recent call last) Cell In[10], line 1 ----> 1 returned_job = ml_client.create_or_update(job) 2 print(f"Created job: ,{returned_job}")

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/_ml_client.py:1063, in MLClient.create_or_update(self, entity, kwargs) 1047 def create_or_update( 1048 self, 1049 entity: T, 1050 kwargs, 1051 ) -> T: 1052 """Creates or updates an Azure ML resource. 1053 1054 :param entity: The resource to create or update. (...) 1060 , ~azure.ai.ml.entities.Environment, ~azure.ai.ml.entities.Component, ~azure.ai.ml.entities.Datastore] 1061 """ -> 1063 return _create_or_update(entity, self._operation_container.all_operations, **kwargs)

File /anaconda/envs/azureml_py38/lib/python3.8/functools.py:875, in singledispatch..wrapper(*args, *kw) 871 if not args: 872 raise TypeError(f'{funcname} requires at least ' 873 '1 positional argument') --> 875 return dispatch(args[0].class)(args, **kw)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/_mlclient.py:1122, in (entity, operations, kwargs) 1119 @_create_orupdate.register(Job) 1120 def (entity: Job, operations, kwargs): 1121 module_logger.debug("Creating or updating job") -> 1122 return operations[AzureMLResourceType.JOB].create_or_update(entity, **kwargs)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/core/tracing/decorator.py:76, in distributed_trace..decorator..wrapper_use_tracer(*args, *kwargs) 74 span_impl_type = settings.tracing_implementation() 75 if span_impl_type is None: ---> 76 return func(args, **kwargs) 78 # Merge span is parameter is set, but only if no explicit parent are passed 79 if merge_span and not passed_in_parent:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/_telemetry/activity.py:350, in monitor_with_telemetry_mixin..monitor..wrapper(*args, kwargs) 348 dimensions = {parameter_dimensions, *(custom_dimensions or {})} 349 with log_activity(logger, activity_name or f.name, activity_type, dimensions) as activityLogger: --> 350 return_value = f(args, **kwargs) 351 if not parameter_dimensions: 352 # collect from return if no dimensions from parameter 353 activityLogger.activity_info.update(_collect_from_return_value(return_value))

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/operations/_job_operations.py:645, in JobOperations.create_or_update(self, job, description, compute, tags, experiment_name, skip_validation, **kwargs) 642 self._validate(job, raise_on_failure=True) 644 # Create all dependent resources --> 645 self._resolve_arm_id_or_upload_dependencies(job) 646 except (ValidationException, ValidationError) as ex: # pylint: disable=broad-except 647 log_and_raise_error(ex)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/operations/_job_operations.py:1026, in JobOperations._resolve_arm_id_or_upload_dependencies(self, job) 1016 def _resolve_arm_id_or_upload_dependencies(self, job: Job) -> None: 1017 """This method converts name or name:version to ARM id. Or it 1018 registers/uploads nested dependencies. 1019 (...) 1023 :rtype: Job 1024 """ -> 1026 self._resolve_arm_id_or_azureml_id(job, self._orchestrators.get_asset_arm_id) 1028 if isinstance(job, PipelineJob): 1029 # Resolve top-level inputs 1030 self._resolve_job_inputs(self._flatten_group_inputs(job.inputs), job._base_path)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/operations/_job_operations.py:1271, in JobOperations._resolve_arm_id_or_azureml_id(self, job, resolver) 1269 job.compute = self._resolve_compute_id(resolver, job.compute) 1270 elif isinstance(job, Command): -> 1271 job = self._resolve_arm_id_for_command_job(job, resolver) 1272 elif isinstance(job, ImportJob): 1273 job = self._resolve_arm_id_for_import_job(job, resolver)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/operations/_job_operations.py:1318, in JobOperations._resolve_arm_id_for_command_job(self, job, resolver) 1308 raise ValidationException( 1309 message=msg.format(job.code), 1310 target=ErrorTarget.JOB, (...) 1313 error_type=ValidationErrorType.INVALID_VALUE, 1314 ) 1316 if job.code is not None and not is_ARM_id_for_resource(job.code, AzureMLResourceType.CODE): 1317 job.code = resolver( -> 1318 Code(base_path=job._base_path, path=job.code), 1319 azureml_type=AzureMLResourceType.CODE, 1320 ) 1321 job.environment = resolver(job.environment, azureml_type=AzureMLResourceType.ENVIRONMENT) 1322 job.compute = self._resolve_compute_id(resolver, job.compute)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/entities/_assets/_artifacts/code.py:68, in Code.init(self, name, version, description, tags, properties, path, ignore_file, **kwargs) 65 if self.path and os.path.isabs(self.path): 66 # Only calculate hash for local files 67 self._ignore_file = get_ignore_file(self.path) if ignore_file is None else ignore_file ---> 68 self._hash_sha256 = get_content_hash(self.path, self._ignore_file)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/_utils/_asset_utils.py:339, in get_content_hash(path, ignore_file) 337 actual_path = _resolve_path(Path(path)).as_posix() 338 if os.path.isdir(actual_path): --> 339 return _get_file_list_content_hash(get_upload_files_from_folder(actual_path, ignore_file=ignore_file)) 340 if os.path.isfile(actual_path): 341 return _get_file_list_content_hash([(actual_path, Path(actual_path).name)])

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/_utils/_asset_utils.py:380, in _get_file_list_content_hash(file_list) 378 _hash.update(str(os.path.getsize(file_path)).encode()) 379 for file_path, file_name in sorted(file_list, key=lambda x: str(x[1]).lower()): --> 380 _hash = _get_file_hash(file_path, _hash) 381 return str(_hash.hexdigest())

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/azure/ai/ml/_utils/_asset_utils.py:238, in _get_file_hash(filename, _hash) 237 def _get_file_hash(filename: Union[str, os.PathLike], _hash: hash_type) -> hash_type: --> 238 with open(str(filename), "rb") as f: 239 for chunk in iter(lambda: f.read(CHUNK_SIZE), b""): 240 _hash.update(chunk) IsADirectoryError: [Errno 21] Is a directory: /data/mmdetection/configs'

Expected behavior

It should be created and can be displayed in Jobs.

Environment Summary

FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu117-py38-torch201:biweekly.202401.1

USER root RUN apt-get -y update

COPY requirements.txt . RUN pip install -r requirements.txt --no-cache-dir

RUN mim install mmcv==2.0.1 RUN mim install mmdet==3.1.0

RUN pip install pydash==6.0.0 RUN pip install urllib3==2.0.7 RUN pip install pyarrow==14.0.1 RUN pip install aiohttp==3.9.1 RUN pip install mmpretrain

Additional context

No response

yonzhan commented 6 months ago

Thank you for opening this issue, we will look into it.

microsoft-github-policy-service[bot] commented 6 months ago

Thanks for the feedback! We are routing this to the appropriate team for follow-up. cc @azureml-github.

SnowRipple commented 3 months ago

@xixidegb Did you manage to run your code on azure?

SnowRipple commented 3 months ago

@nancy-mejia any luck?