aws-samples / amazon-sagemaker-feature-store-end-to-end-workshop

MIT No Attribution
128 stars 50 forks source link

cell 16 in m6_nb1 fails when using latest version of sagemaker python sdk (v2.132.0) #41

Open jld23 opened 1 year ago

jld23 commented 1 year ago

Here is the code and traceback from running the cell. I looked at the documentation and couldn't see a clear and obvious fix.

from sagemaker.processing import FeatureStoreOutput
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

step_process = ProcessingStep(
    name="fscw-Automated-DataWrangler-Processing",
    processor=processor,
    inputs=[
        ProcessingInput(input_name='flow', 
                        destination='/opt/ml/processing/flow',
                        source=input_flow,
                        s3_data_type= 'S3Prefix',
                        s3_input_mode= 'File'
                       )
    ],
    outputs=[
        ProcessingOutput(
            output_name=output_name,
            app_managed=True, 
            feature_store_output=FeatureStoreOutput(feature_group_name=feature_group_name))
    ],
    job_arguments=[f"--output-config '{json.dumps(output_config)}'"]
)

Traceback

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[16], line 5
      2 from sagemaker.processing import ProcessingInput, ProcessingOutput
      3 from sagemaker.workflow.steps import ProcessingStep
----> 5 step_process = ProcessingStep(
      6     name="fscw-Automated-DataWrangler-Processing",
      7     processor=processor,
      8     inputs=[
      9         ProcessingInput(input_name='flow', 
     10                         destination='/opt/ml/processing/flow',
     11                         source=input_flow,
     12                         s3_data_type= 'S3Prefix',
     13                         s3_input_mode= 'File'
     14                        )
     15     ],
     16     outputs=[
     17         ProcessingOutput(
     18             output_name=output_name,
     19             app_managed=True, 
     20             feature_store_output=FeatureStoreOutput(feature_group_name=feature_group_name))
     21     ],
     22     job_arguments=[f"--output-config '{json.dumps(output_config)}'"]
     23 )

File /opt/conda/lib/python3.8/site-packages/sagemaker/workflow/steps.py:394, in ProcessingStep.__init__(self, name, processor, inputs, outputs, job_arguments, code, property_files, cache_config, depends_on)
    390 # Examine why run method in sagemaker.processing.Processor mutates the processor instance
    391 # by setting the instance's arguments attribute. Refactor Processor.run, if possible.
    392 self.processor.arguments = job_arguments
--> 394 self._properties = Properties(
    395     path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse"
    396 )
    397 self.cache_config = cache_config

TypeError: __init__() missing 1 required positional argument: 'step_name'