Open fbagirov opened 7 years ago
I am trying to normalize a dataframe with Job Titles as in the Example 12 at this link .
The cluster_contacts_by_title function goes through, however, when I am passing an argument to it:
clustered_contacts = cluster_contacts_by_title(df_titles) print(clustered_contacts)
I am getting an error:
KeyError Traceback (most recent call last)
//anaconda/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
1944 try:
-> 1945 return self._engine.get_loc(key)
1946 except KeyError:
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)()
KeyError: 0
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-45-6b22ef74c260> in <module>()
----> 1 clustered_contacts = cluster_contacts_by_title(df_titles)
2 print(clustered_contacts)
<ipython-input-44-e662224d333b> in cluster_contacts_by_title(df_data)
216 all_titles = []
217 for i, _ in enumerate(df_data):
--> 218 if df_data[i]['Job Title'] == '':
219 df_data[i]['Job Titles'] = ['']
220 continue
//anaconda/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
1995 return self._getitem_multilevel(key)
1996 else:
-> 1997 return self._getitem_column(key)
1998
1999 def _getitem_column(self, key):
//anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2002 # get column
2003 if self.columns.is_unique:
-> 2004 return self._get_item_cache(key)
2005
2006 # duplicate columns & possible reduce dimensionality
//anaconda/lib/python3.5/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1348 res = cache.get(item)
1349 if res is None:
-> 1350 values = self._data.get(item)
1351 res = self._box_item_values(item, values)
1352 cache[item] = res
//anaconda/lib/python3.5/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3288
3289 if not isnull(item):
-> 3290 loc = self.items.get_loc(item)
3291 else:
3292 indexer = np.arange(len(self.items))[isnull(self.items)]
//anaconda/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
1945 return self._engine.get_loc(key)
1946 except KeyError:
-> 1947 return self._engine.get_loc(self._maybe_cast_indexer(key))
1948
1949 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)()
KeyError: 0
It seems there is an issue with line 218:
217 for i, _ in enumerate(df_data):
--> 218 if df_data[i]['Job Title'] == '':
219 df_data[i]['Job Titles'] = ['']
220 continue
This is the code from the example 12:
for i, _ in enumerate(contacts):
if contacts[i]['Job Title'] == '':
contacts[i]['Job Titles'] = ['']
continue
What could be an issue? Thanks!
removed