Open SteveHong1901 opened 1 week ago
from fuzzywuzzy import fuzz, process def create_standardized_mapping(unique_names, accurate_names, threshold=90): standardized_mapping = [] for name in unique_names: best_match = process.extractOne(name, accurate_names, scorer=fuzz.token_sort_ratio) if best_match and best_match[1] >= threshold: standardized_mapping.append({'original_name': name, 'matched_name': best_match[0], 'score': best_match[1]}) else: standardized_mapping.append({'original_name': name, 'matched_name': name, 'score': 0}) return standardized_mapping # Example usage unique_names = ["Jon", "Johann", "Jane"] accurate_names = ["John", "Johnny", "Jane"] threshold = 80 standardized_mapping = create_standardized_mapping(unique_names, accurate_names, threshold) # Display results import pandas as pd df = pd.DataFrame(standardized_mapping) print(df)