Open ubh0927 opened 11 months ago
import pandas as pd
danmu_df = pd.read_csv('path_to_your_file.csv')
danmu_df_unique = danmu_df.drop_duplicates(subset=['F'])
danmu_df_unique.to_csv('path_to_your_new_file.csv', index=False)
try:
danmu_df = pd.read_csv('/mnt/data/纪录片弹幕.csv')
except UnicodeDecodeError:
danmu_df = pd.read_csv('/mnt/data/纪录片弹幕.csv', encoding='gbk')
danmu_df_unique = danmu_df.drop_duplicates(subset=['F'])
output_path = '/mnt/data/纪录片弹幕_no_duplicates.csv' danmu_df_unique.to_csv(output_path, index=False)
output_path
纪录片弹幕.csv 分析F列的中文,将重复的文字进行删除