metinsenturk / flat_table

An extention to json_normalize() in pandas
https://pypi.org/project/flat-table/
MIT License
27 stars 9 forks source link

ValueError: cannot reindex from a duplicate axis #7

Open bjornjorgensen opened 4 years ago

bjornjorgensen commented 4 years ago

/home/bjorn/.local/lib/python3.8/site-packages/flat_table/_norm.py:92: FutureWarning: The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead typ = type(pd.np.nan).name /home/bjorn/.local/lib/python3.8/site-packages/flat_table/_norm.py:53: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead df_withvalues = pd.io.json.json_normalize(ds_withvalues)


ValueError Traceback (most recent call last)

in ----> 1 flat_table.normalize(df) ~/.local/lib/python3.8/site-packages/flat_table/_norm.py in normalize(df, expand_dicts, expand_lists, is_mapper) 195 for parent in dataframe.parent.unique(): 196 group = dataframe[dataframe.parent.isin([parent])] --> 197 df_group = pd.concat([i for i in group.obj], axis=1) 198 dfs.append(df_group) 199 logger.info('{:40} before: {:7} after: {:7} obj: {:7} col: {}'.format( ~/.local/lib/python3.8/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy) 285 ) 286 --> 287 return op.get_result() 288 289 ~/.local/lib/python3.8/site-packages/pandas/core/reshape/concat.py in get_result(self) 478 479 index, columns = self.new_axes --> 480 df = cons(data, index=index) 481 df.columns = columns 482 return df.__finalize__(self, method="concat") ~/.local/lib/python3.8/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy) 465 466 elif isinstance(data, dict): --> 467 mgr = init_dict(data, index, columns, dtype=dtype) 468 elif isinstance(data, ma.MaskedArray): 469 import numpy.ma.mrecords as mrecords ~/.local/lib/python3.8/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype) 281 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays 282 ] --> 283 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) 284 285 ~/.local/lib/python3.8/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity) 81 82 # don't force copy because getting jammed in an ndarray anyway ---> 83 arrays = _homogenize(arrays, index, dtype) 84 85 columns = ensure_index(columns) ~/.local/lib/python3.8/site-packages/pandas/core/internals/construction.py in _homogenize(data, index, dtype) 338 # Forces alignment. No need to copy data since we 339 # are putting it into an ndarray later --> 340 val = val.reindex(index, copy=False) 341 else: 342 if isinstance(val, dict): ~/.local/lib/python3.8/site-packages/pandas/core/series.py in reindex(self, index, **kwargs) 4397 ) 4398 def reindex(self, index=None, **kwargs): -> 4399 return super().reindex(index=index, **kwargs) 4400 4401 def drop( ~/.local/lib/python3.8/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs) 4450 4451 # perform the reindex on the axes -> 4452 return self._reindex_axes( 4453 axes, level, limit, tolerance, method, fill_value, copy 4454 ).__finalize__(self, method="reindex") ~/.local/lib/python3.8/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy) 4470 4471 axis = self._get_axis_number(a) -> 4472 obj = obj._reindex_with_indexers( 4473 {axis: [new_index, indexer]}, 4474 fill_value=fill_value, ~/.local/lib/python3.8/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups) 4513 4514 # TODO: speed up on homogeneous DataFrame objects -> 4515 new_data = new_data.reindex_indexer( 4516 index, 4517 indexer, ~/.local/lib/python3.8/site-packages/pandas/core/internals/managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy) 1241 # some axes don't allow reindexing with dups 1242 if not allow_dups: -> 1243 self.axes[axis]._can_reindex(indexer) 1244 1245 if axis >= self.ndim: ~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in _can_reindex(self, indexer) 3281 # trying to reindex on an axis with duplicates 3282 if not self.is_unique and len(indexer): -> 3283 raise ValueError("cannot reindex from a duplicate axis") 3284 3285 def reindex(self, target, method=None, level=None, limit=None, tolerance=None): ValueError: cannot reindex from a duplicate axis
metinsenturk commented 4 years ago

This issue is very unclear to understand. However, I kind of have a feeling that this might be related with your DataFrame rather than the flat_table.

Thanks!

andrescevp commented 4 years ago

I raised same issue...

I was trying to get a table from a df with a column with this jsoon:

{"type":"FeatureCollection","features":[{"bbox":[8.681436,49.41461,8.690123,49.420514],"type":"Feature","properties":{"segments":[{"distance":887.8,"duration":189,"steps":[{"distance":312.6,"duration":75,"type":11,"instruction":"Head north on Wielandtstraße","name":"Wielandtstraße","way_points":[0,10]},{"distance":251.1,"duration":36.2,"type":1,"instruction":"Turn right onto Mönchhofstraße","name":"Mönchhofstraße","way_points":[10,21]},{"distance":212.2,"duration":50.9,"type":2,"instruction":"Turn sharp left onto Keplerstraße","name":"Keplerstraße","way_points":[21,24]},{"distance":109.9,"duration":26.4,"type":1,"instruction":"Turn right onto Moltkestraße","name":"Moltkestraße","way_points":[24,27]},{"distance":2,"duration":0.5,"type":0,"instruction":"Turn left onto Werderplatz","name":"Werderplatz","way_points":[27,28]},{"distance":0,"duration":0,"type":10,"instruction":"Arrive at Werderplatz, on the right","name":"-","way_points":[28,28]}]},{"distance":481.2,"duration":103,"steps":[{"distance":2,"duration":0.5,"type":11,"instruction":"Head south on Werderplatz","name":"Werderplatz","way_points":[28,29]},{"distance":265.5,"duration":63.7,"type":0,"instruction":"Turn left onto Moltkestraße","name":"Moltkestraße","way_points":[29,37]},{"distance":83,"duration":7.5,"type":0,"instruction":"Turn left onto Handschuhsheimer Landstraße, B 3","name":"Handschuhsheimer Landstraße, B 3","way_points":[37,39]},{"distance":130.8,"duration":31.4,"type":0,"instruction":"Turn left onto Roonstraße","name":"Roonstraße","way_points":[39,42]},{"distance":0,"duration":0,"type":10,"instruction":"Arrive at Roonstraße, straight ahead","name":"-","way_points":[42,42]}]}],"summary":{"distance":1369,"duration":292},"way_points":[0,28,42]},"geometry":{"coordinates":[[8.681496,49.41461],[8.68149,49.414711],[8.681441,49.415655],[8.681436,49.415747],[8.681455,49.415835],[8.681509,49.416087],[8.681642,49.416498],[8.681671,49.416588],[8.681701,49.416684],[8.681875,49.417287],[8.68189,49.417394],[8.682045,49.41739],[8.682107,49.41739],[8.682461,49.417389],[8.682563,49.417388],[8.682676,49.417387],[8.682782,49.417388],[8.683371,49.417368],[8.683592,49.41736],[8.683763,49.417362],[8.685222,49.417366],[8.685359,49.417364],[8.685342,49.417411],[8.685024,49.419178],[8.68501,49.419258],[8.685156,49.419273],[8.686408,49.419402],[8.68651,49.419413],[8.686506,49.41943],[8.68651,49.419413],[8.686617,49.419425],[8.686983,49.419461],[8.687101,49.419473],[8.687212,49.419486],[8.688301,49.419619],[8.688398,49.41963],[8.690104,49.419828],[8.690123,49.419833],[8.689854,49.420216],[8.689652,49.420514],[8.68963,49.42051],[8.688601,49.420393],[8.687872,49.420318]],"type":"LineString"}}],"bbox":[8.681436,49.41461,8.690123,49.420514],"metadata":{"attribution":"openrouteservice.org | OpenStreetMap contributors","service":"routing","timestamp":1601410558822,"query":{"coordinates":[[8.681495,49.41461],[8.686507,49.41943],[8.687872,49.420318]],"profile":"driving-car","format":"geojson"},"engine":{"version":"6.3.0","build_date":"2020-09-21T01:00:26Z","graph_date":"1970-01-01T00:00:00Z"}}}