Open crucis opened 3 years ago
The following example does not work in Databricks Runtime 8.4:
kdf = ks.DataFrame({"ui": ['C', 'D', 'D', 'C'], "foo": ['one', 'one', 'two', 'two'], "bar": ['A', 'A', 'B', 'C'], "ar": [1, 2, 2, 2], "baz": [1, 2, 3, 4]}, columns=['ui', 'foo', 'bar', 'baz', 'ar']) kdf.pivot(index=['ui', 'foo'], columns='bar', values=['baz', 'ar'])
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <command-4107535394601473> in <module> ----> 1 df.pivot(index=['ui','foo'] , columns='bar', values=['baz', 'ar']) /databricks/python/lib/python3.8/site-packages/databricks/koalas/usage_logging/__init__.py in wrapper(*args, **kwargs) 193 start = time.perf_counter() 194 try: --> 195 res = func(*args, **kwargs) 196 logger.log_success( 197 class_name, function_name, time.perf_counter() - start, signature /databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in pivot(self, index, columns, values) 6274 index = df._internal.column_labels[: self._internal.index_level] 6275 -> 6276 df = df.pivot_table(index=index, columns=columns, values=values, aggfunc="first") 6277 6278 if should_use_existing_index: /databricks/python/lib/python3.8/site-packages/databricks/koalas/usage_logging/__init__.py in wrapper(*args, **kwargs) 188 if hasattr(_local, "logging") and _local.logging: 189 # no need to log since this should be internal call. --> 190 return func(*args, **kwargs) 191 _local.logging = True 192 try: /databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in pivot_table(self, values, index, columns, aggfunc, fill_value) 6048 index = [label if is_name_like_tuple(label) else (label,) for label in index] 6049 sdf = ( -> 6050 sdf.groupBy([self._internal.spark_column_name_for(label) for label in index]) 6051 .pivot(pivot_col=self._internal.spark_column_name_for(columns)) 6052 .agg(*agg_cols) /databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in <listcomp>(.0) 6048 index = [label if is_name_like_tuple(label) else (label,) for label in index] 6049 sdf = ( -> 6050 sdf.groupBy([self._internal.spark_column_name_for(label) for label in index]) 6051 .pivot(pivot_col=self._internal.spark_column_name_for(columns)) 6052 .agg(*agg_cols) /databricks/python/lib/python3.8/site-packages/databricks/koalas/internal.py in spark_column_name_for(self, label_or_scol) 813 scol = label_or_scol 814 else: --> 815 scol = self.spark_column_for(label_or_scol) 816 return self.spark_frame.select(scol).columns[0] 817 /databricks/python/lib/python3.8/site-packages/databricks/koalas/internal.py in spark_column_for(self, label) 803 """ Return Spark Column for the given column label. """ 804 column_labels_to_scol = dict(zip(self.column_labels, self.data_spark_columns)) --> 805 if label in column_labels_to_scol: 806 return column_labels_to_scol[label] 807 else: TypeError: unhashable type: 'list'
I am using
kdf.pivot_table(index=['ui','foo'] , columns='bar', values=['baz', 'ar'], aggfunc='first')
to solve my problem, but I think that pivot should work with Multiindex.
pivot
The following example does not work in Databricks Runtime 8.4:
I am using
to solve my problem, but I think that
pivot
should work with Multiindex.