Open surianisha opened 2 years ago
Internal error details:
Traceback (most recent call last): File "/opt/anaconda3/lib/python3.9/site-packages/qtconsole/base_frontend_mixin.py", line 138, in _dispatch handler(msg) File "/opt/anaconda3/lib/python3.9/site-packages/spyder/plugins/ipythonconsole/widgets/debugging.py", line 278, in _handle_input_request return super(DebuggingWidget, self)._handle_input_request(msg) File "/opt/anaconda3/lib/python3.9/site-packages/qtconsole/frontend_widget.py", line 512, in _handle_input_request self._readline(msg['content']['prompt'], callback=callback, password=msg['content']['password']) File "/opt/anaconda3/lib/python3.9/site-packages/qtconsole/console_widget.py", line 2422, in _readline self._show_prompt(prompt, newline=False, separator=False) TypeError: _show_prompt() got an unexpected keyword argument 'separator'
Couldn't find 'csv_example_training.json' in the repo, so used 'csv_input_with_true_ids.csv'. There was no setting file either so couldn't use that (commented out in code as shared below). Made sure to use consoleLabel() instead of console_label().
Followed the steps in csv_example.py. Active learning got initiated but the program terminates without error message.
The code is below: ################################################## import os import csv import re import logging import optparse
import dedupe from unidecode import unidecode
def preProcess(column): # column = unidecode(column) column = re.sub(' +', ' ', column) column = re.sub('\n', ' ', column) column = column.strip().strip('"').strip("'").lower().strip()
def readData(filename): # data_d = {} with open(filename) as f: reader = csv.DictReader(f) for row in reader: clean_row = [(k, preProcess(v)) for (k, v) in row.items()] row_id = int(row['Id']) data_d[row_id] = dict(clean_row)
example
path = '/Users/asuri/Downloads/dedupe-examples-master/csv_example/' filename = 'csv_example_messy_input.csv'
#######################################
if name == 'main':
as of 2.0 this method is called console_label() but in 1.x it was called consoleLabel(), that difference may account for the error. Now updated to consoleLabel
print('clustering...') clustered_dupes = deduper.partition(data_d, 0.5) print('# duplicate sets', len(clustered_dupes))
cluster_membership = {} for cluster_id, (records, scores) in enumerate(clustered_dupes): for record_id, score in zip(records, scores): cluster_membership[record_id] = { "Cluster ID": cluster_id, "confidence_score": score }
with open(output_file, 'w') as f_output, open(input_file) as f_input: