Open Saurabhdhoble-8451 opened 1 year ago
Hi team, I am following code example given on webpage: https://learn.microsoft.com/en-us/azure/open-datasets/dataset-taxi-yellow?tabs=azureml-opendatasets
I am running this code on azure databricks environement.
TypeError Traceback (most recent call last) File :10 8 start_date = parser.parse('2018-05-01 00:00:00') 9 nyc_tlc = NycTlcYellow(start_date, end_date) ---> 10 nyc_tlc_df = nyc_tlc.to_spark_dataframe()
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/accessories/_loggerfactory.py:139, in track..monitor..wrapper(*args, *kwargs) 137 with _LoggerFactory.track_activity(logger, func.name, activity_type, custom_dimensions) as al: 138 try: --> 139 return func(args, **kwargs) 140 except Exception as e: 141 al.activity_info['error_message'] = str(e)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/accessories/open_dataset_base.py:164, in OpenDatasetBase.to_spark_dataframe(self) 161 self._log_properties['ActivityType'] = ActivityType.PUBLICAPI 162 _LoggerFactory.log_event( 163 'to_spark_dataframe_in_worker', **self._log_properties) --> 164 return self._to_spark_dataframe()
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/accessories/open_dataset_base.py:305, in OpenDatasetBase._to_spark_dataframe(self) 299 def _to_spark_dataframe(self): 300 """ 301 To SPARK dataframe, internal to override. 302 303 :return: SPARK dataframe. 304 """ --> 305 return self._blob_accessor.get_spark_dataframe( 306 self.cols, 307 **self._kwargs 308 )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/dataaccess/_blob_accessor.py:303, in BlobAccessor.get_spark_dataframe(self, cols, *kwargs) 301 target_paths = targets_dataframe.Path 302 wasab_format = "wasbs://%s@%s.blob.core.windows.net/%s" --> 303 paths = [wasab_format % (self._blob_container_name, self._blob_account_name, 304 self._get_relative_path(path)) for path in target_paths] 305 spark = SparkSession.builder.getOrCreate() 306 df = spark.read \ 307 .option("basePath", self.get_data_wasbs_path()) \ 308 .parquet(paths)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/dataaccess/_blob_accessor.py:304, in (.0) 301 target_paths = targets_dataframe.Path 302 wasab_format = "wasbs://%s@%s.blob.core.windows.net/%s" 303 paths = [wasab_format % (self._blob_container_name, self._blob_account_name, --> 304 self._get_relative_path(path)) for path in target_paths] 305 spark = SparkSession.builder.getOrCreate() 306 df = spark.read \ 307 .option("basePath", self.get_data_wasbs_path()) \ 308 .parquet(*paths)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/dataaccess/_blob_accessor.py:467, in BlobAccessor._get_relative_path(self, url) 466 def _get_relative_path(self, url: str) -> str: --> 467 if "blob.core.windows.net" in url: 468 return url.replace(self._get_base_url(), "") 469 else:
TypeError: argument of type 'StreamInfo' is not iterable
⚠ Do not edit this section. It is required for learn.microsoft.com ➟ GitHub issue linking.
@Saurabhdhoble-8451 Thanks for your feedback! We will investigate and update as appropriate.
Hi team, I am following code example given on webpage: https://learn.microsoft.com/en-us/azure/open-datasets/dataset-taxi-yellow?tabs=azureml-opendatasets
I am running this code on azure databricks environement.
Getting below error:
TypeError Traceback (most recent call last) File:10
8 start_date = parser.parse('2018-05-01 00:00:00')
9 nyc_tlc = NycTlcYellow(start_date, end_date)
---> 10 nyc_tlc_df = nyc_tlc.to_spark_dataframe()
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/accessories/_loggerfactory.py:139, in track..monitor..wrapper(*args, *kwargs)
137 with _LoggerFactory.track_activity(logger, func.name, activity_type, custom_dimensions) as al:
138 try:
--> 139 return func(args, **kwargs)
140 except Exception as e:
141 al.activity_info['error_message'] = str(e)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/accessories/open_dataset_base.py:164, in OpenDatasetBase.to_spark_dataframe(self) 161 self._log_properties['ActivityType'] = ActivityType.PUBLICAPI 162 _LoggerFactory.log_event( 163 'to_spark_dataframe_in_worker', **self._log_properties) --> 164 return self._to_spark_dataframe()
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/accessories/open_dataset_base.py:305, in OpenDatasetBase._to_spark_dataframe(self) 299 def _to_spark_dataframe(self): 300 """ 301 To SPARK dataframe, internal to override. 302 303 :return: SPARK dataframe. 304 """ --> 305 return self._blob_accessor.get_spark_dataframe( 306 self.cols, 307 **self._kwargs 308 )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/dataaccess/_blob_accessor.py:303, in BlobAccessor.get_spark_dataframe(self, cols, *kwargs) 301 target_paths = targets_dataframe.Path 302 wasab_format = "wasbs://%s@%s.blob.core.windows.net/%s" --> 303 paths = [wasab_format % (self._blob_container_name, self._blob_account_name, 304 self._get_relative_path(path)) for path in target_paths] 305 spark = SparkSession.builder.getOrCreate() 306 df = spark.read \ 307 .option("basePath", self.get_data_wasbs_path()) \ 308 .parquet(paths)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/dataaccess/_blob_accessor.py:304, in(.0)
301 target_paths = targets_dataframe.Path
302 wasab_format = "wasbs://%s@%s.blob.core.windows.net/%s"
303 paths = [wasab_format % (self._blob_container_name, self._blob_account_name,
--> 304 self._get_relative_path(path)) for path in target_paths]
305 spark = SparkSession.builder.getOrCreate()
306 df = spark.read \
307 .option("basePath", self.get_data_wasbs_path()) \
308 .parquet(*paths)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-32d2f174-0835-4820-a9a8-7dd8d8d9e3ac/lib/python3.9/site-packages/azureml/opendatasets/dataaccess/_blob_accessor.py:467, in BlobAccessor._get_relative_path(self, url) 466 def _get_relative_path(self, url: str) -> str: --> 467 if "blob.core.windows.net" in url: 468 return url.replace(self._get_base_url(), "") 469 else:
TypeError: argument of type 'StreamInfo' is not iterable
Document Details
⚠ Do not edit this section. It is required for learn.microsoft.com ➟ GitHub issue linking.