GoogleCloudPlatform / training-data-analyst

Labs and demos for courses for GCP Training (http://cloud.google.com/training).
Apache License 2.0
7.85k stars 5.86k forks source link

"Vertex AI: Qwik Start" notebook fails in "Exploratory data analysis (EDA) in BigQuery" #2110

Open kynan opened 1 year ago

kynan commented 1 year ago

Executing this cell

%%bigquery recency

SELECT 
  days_since_last_purchase
FROM 
  `online_retail.online_retail_clv_ml`

fails with the error '_Plugin' object has no attribute '_stored_ctx'.

Full backtrace ``` Query is running: 0%| | 0/1 [00:00 ----> 1 get_ipython().run_cell_magic('bigquery', 'recency', '\nSELECT \n days_since_last_purchase\nFROM \n `online_retail.online_retail_clv_ml`\n') /opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell) 2470 with self.builtin_trap: 2471 args = (magic_arg_s, cell) -> 2472 result = fn(*args, **kwargs) 2473 return result 2474 /opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/magics/magics.py in _cell_magic(line, query) 711 return 712 --> 713 if not args.verbose: 714 display.clear_output() 715 /opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job/query.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object, max_results, geography_as_object) 1700 intermediate shuffle and spilled to disk. 1701 """ -> 1702 return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) 1703 1704 @property /opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/table.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object, geography_as_object) 1986 1987 if start is not None: -> 1988 self.start = start 1989 if end is not None: 1990 self.end = end /opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/table.py in to_arrow(self, progress_bar_type, bqstorage_client, create_bqstorage_client) 1778 # If changing the signature of this method, make sure to apply the same 1779 # changes to job.QueryJob.to_dataframe() -> 1780 def to_dataframe( 1781 self, 1782 bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, /opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/table.py in _to_page_iterable(self, bqstorage_download, tabledata_list_download, bqstorage_client) 1633 1634 This method requires the ``pyarrow`` and -> 1635 ``google-cloud-bigquery-storage`` libraries. 1636 1637 This method only exposes a subset of the capabilities of the /opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/_pandas_helpers.py in _download_table_bqstorage(project_id, table, bqstorage_client, preserve_order, selected_fields, page_to_item, max_queue_size) 753 ) 754 --> 755 756 def download_dataframe_bqstorage( 757 project_id, /opt/conda/lib/python3.7/site-packages/google/cloud/bigquery_storage_v1/services/big_query_read/client.py in create_read_session(self, request, parent, read_session, max_stream_count, retry, timeout, metadata) 624 retry=retry, 625 timeout=timeout, --> 626 metadata=metadata, 627 ) 628 /opt/conda/lib/python3.7/site-packages/google/api_core/gapic_v1/method.py in __call__(self, timeout, retry, *args, **kwargs) 152 kwargs["metadata"] = metadata 153 --> 154 return wrapped_func(*args, **kwargs) 155 156 /opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_wrapped_func(*args, **kwargs) 286 sleep_generator, 287 self._deadline, --> 288 on_error=on_error, 289 ) 290 /opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error) 188 for sleep in sleep_generator: 189 try: --> 190 return target() 191 192 # pylint: disable=broad-except /opt/conda/lib/python3.7/site-packages/google/api_core/grpc_helpers.py in error_remapped_callable(*args, **kwargs) 55 callable_.__name__ = callable_.__class__.__name__ 56 ---> 57 58 def _wrap_unary_errors(callable_): 59 """Map errors for Unary-Unary and Stream-Unary gRPC callables.""" /opt/conda/lib/python3.7/site-packages/grpc/_channel.py in __call__(self, request, timeout, metadata, credentials, wait_for_ready, compression) 943 state, operations, deadline, rendezvous = self._prepare( 944 request, timeout, metadata, wait_for_ready, compression) --> 945 if state is None: 946 raise rendezvous # pylint: disable-msg=raising-bad-type 947 else: /opt/conda/lib/python3.7/site-packages/grpc/_channel.py in _blocking(self, request, timeout, metadata, credentials, wait_for_ready, compression) 931 compression=None): 932 state, call, = self._blocking(request, timeout, metadata, credentials, --> 933 wait_for_ready, compression) 934 return _end_unary_response_blocking(state, call, True, None) 935 src/python/grpcio/grpc/_cython/_cygrpc/channel.pyx.pxi in grpc._cython.cygrpc.SegregatedCall.next_event() src/python/grpcio/grpc/_cython/_cygrpc/channel.pyx.pxi in grpc._cython.cygrpc._next_call_event() src/python/grpcio/grpc/_cython/_cygrpc/channel.pyx.pxi in grpc._cython.cygrpc._next_call_event() src/python/grpcio/grpc/_cython/_cygrpc/completion_queue.pyx.pxi in grpc._cython.cygrpc._latent_event() src/python/grpcio/grpc/_cython/_cygrpc/completion_queue.pyx.pxi in grpc._cython.cygrpc._internal_latent_event() src/python/grpcio/grpc/_cython/_cygrpc/credentials.pyx.pxi in grpc._cython.cygrpc._get_metadata() AttributeError: '_Plugin' object has no attribute '_stored_ctx' ```
tm-jc-nacpil commented 1 year ago

I have faced the same error. For this cell, I worked around it by using pandas-gbq to download the relevant table as a geodataframe

  1. Run pip install pandas-gbq in the terminal
  2. Replace the cell command
%%bigquery recency

SELECT 
  days_since_last_purchase
FROM 
  `online_retail.online_retail_clv_ml`

into

recency = pd.read_gbq(
"""
SELECT 
  days_since_last_purchase
FROM 
  `online_retail.online_retail_clv_ml`
"""
)

This let me proceed until Create a managed Tabular dataset from your BigQuery data source, where I got the same error.

Backtrace --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) /tmp/ipykernel_15579/1145961888.py in ----> 1 tabular_dataset = aiplatform.TabularDataset.create(display_name="online-retail-clv", bq_source=f"{BQ_URI}") /opt/conda/lib/python3.7/site-packages/google/cloud/aiplatform/datasets/tabular_dataset.py in create(cls, display_name, gcs_source, bq_source, project, location, credentials, request_metadata, encryption_spec_key_name, sync) 349 encryption_spec_key_name=encryption_spec_key_name 350 ), --> 351 sync=sync, 352 ) 353 /opt/conda/lib/python3.7/site-packages/google/cloud/aiplatform/base.py in wrapper(*args, **kwargs) 643 if self: 644 self.wait() --> 645 return method(*args, **kwargs) 646 647 # callbacks to call within the Future (in same Thread) /opt/conda/lib/python3.7/site-packages/google/cloud/aiplatform/datasets/dataset.py in _create_and_import(cls, api_client, parent, display_name, metadata_schema_uri, datasource, project, location, credentials, request_metadata, encryption_spec, sync) 300 datasource=datasource, 301 request_metadata=request_metadata, --> 302 encryption_spec=encryption_spec, 303 ) 304 /opt/conda/lib/python3.7/site-packages/google/cloud/aiplatform/datasets/dataset.py in _create(cls, api_client, parent, display_name, metadata_schema_uri, datasource, request_metadata, encryption_spec) 392 393 return api_client.create_dataset( --> 394 parent=parent, dataset=gapic_dataset, metadata=request_metadata 395 ) 396 /opt/conda/lib/python3.7/site-packages/google/cloud/aiplatform_v1/services/dataset_service/client.py in create_dataset(self, request, parent, dataset, retry, timeout, metadata) 499 500 # Send the request. --> 501 response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) 502 503 # Wrap the response in an operation future. /opt/conda/lib/python3.7/site-packages/google/api_core/gapic_v1/method.py in __call__(self, timeout, retry, *args, **kwargs) 152 kwargs["metadata"] = metadata 153 --> 154 return wrapped_func(*args, **kwargs) 155 156 /opt/conda/lib/python3.7/site-packages/google/api_core/grpc_helpers.py in error_remapped_callable(*args, **kwargs) 55 callable_.__name__ = callable_.__class__.__name__ 56 ---> 57 58 def _wrap_unary_errors(callable_): 59 """Map errors for Unary-Unary and Stream-Unary gRPC callables.""" /opt/conda/lib/python3.7/site-packages/grpc/_channel.py in __call__(self, request, timeout, metadata, credentials, wait_for_ready, compression) 943 state, operations, deadline, rendezvous = self._prepare( 944 request, timeout, metadata, wait_for_ready, compression) --> 945 if state is None: 946 raise rendezvous # pylint: disable-msg=raising-bad-type 947 else: /opt/conda/lib/python3.7/site-packages/grpc/_channel.py in _blocking(self, request, timeout, metadata, credentials, wait_for_ready, compression) 931 compression=None): 932 state, call, = self._blocking(request, timeout, metadata, credentials, --> 933 wait_for_ready, compression) 934 return _end_unary_response_blocking(state, call, True, None) 935 src/python/grpcio/grpc/_cython/_cygrpc/channel.pyx.pxi in grpc._cython.cygrpc.SegregatedCall.next_event() src/python/grpcio/grpc/_cython/_cygrpc/channel.pyx.pxi in grpc._cython.cygrpc._next_call_event() src/python/grpcio/grpc/_cython/_cygrpc/channel.pyx.pxi in grpc._cython.cygrpc._next_call_event() src/python/grpcio/grpc/_cython/_cygrpc/completion_queue.pyx.pxi in grpc._cython.cygrpc._latent_event() src/python/grpcio/grpc/_cython/_cygrpc/completion_queue.pyx.pxi in grpc._cython.cygrpc._internal_latent_event() src/python/grpcio/grpc/_cython/_cygrpc/credentials.pyx.pxi in grpc._cython.cygrpc._get_metadata() AttributeError: '_Plugin' object has no attribute '_stored_ctx'