Open TeorinKim opened 2 years ago
import pandas as pd import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper("http://data.visitkorea.or.kr/sparql") def getresult1(r2): prefix = """ PREFIX skos: http://www.w3.org/2004/02/skos/core# PREFIX rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# PREFIX rdfs: http://www.w3.org/2000/01/rdf-schema# PREFIX dc: http://purl.org/dc/elements/1.1/ PREFIX owl: http://www.w3.org/2002/07/owl# PREFIX xsd: http://www.w3.org/2001/XMLSchema# PREFIX vi: http://www.saltlux.com/transformer/views# PREFIX kto: http://data.visitkorea.or.kr/ontology/ PREFIX ktop: http://data.visitkorea.or.kr/property/ PREFIX ids: http://data.visitkorea.or.kr/resource/ PREFIX wgs: http://www.w3.org/2003/01/geo/wgs84_pos# PREFIX foaf: http://xmlns.com/foaf/0.1/ PREFIX geo: http://www.saltlux.com/geo/property# PREFIX pf: http://www.saltlux.com/DARQ/property# """
select_state = """ SELECT ?label ?location ?mapx ?mapy ?address ?cat ?subcat ?category ?descript WHERE { ?id rdfs:label ?label ; ktop:location ?location ; wgs:long ?mapx ; wgs:lat ?mapy ; ktop:address ?address ; ktop:category ?category ; dc:description ?descript. ?location skos:broader ?upper_loc . ?upper_loc skos:prefLabel ?upper_name . FILTER langMatches( lang(?upper_name), "KO") . FILTER (contains( ?upper_name, '""" + r2 + """')) . ?upper_loc skos:broader ?top_loc . ?top_loc skos:prefLabel ?top_name . FILTER langMatches (lang(?top_name), "KO" ) . FILTER (contains( ?top_name, '""" + '제주' + """')) . ?category skos:broader ?upper_category . ?category rdfs:label ?subcat. ?upper_category skos:broader ?top_category . ?top_category skos:prefLabel ?cat . FILTER langMatches( lang(?cat), "KO" ) . } """ sparql.setQuery(prefix + select_state) sparql.setReturnFormat(JSON) result = sparql.query().convert() return result
result1 = getresult1('제주') result2 = getresult1('서귀포')
len(result2['results']['bindings'])
result2
tourapi1=[] data = result1['results']['bindings']
for i in range(len(data)): temp=[] templabel = data[i]['label']['value'] x=float(data[i]['mapx']['value']) y=float(data[i]['mapy']['value']) temp.append(templabel) temp.append(data[i]['cat']['value']) temp.append(data[i]['subcat']['value']) temp.append(y) temp.append(x) temp.append(data[i]['address']['value']) temp.append(data[i]['descript']['value']) check = str(templabel) + " " + str(x) + " " + str(y) temp.append(check) tourapi1.append(temp)
tourapi2=[] data = result2['results']['bindings']
api_base = tourapi1 + tourapi2
len(api_base)
api_base
col=['name','cat1','cat3','mapy','mapx','address','description' , 'gps_key']
apibase=pd.DataFrame(api_base,columns=col)
apibase
apibase.to_excel('D:/ML/prepost/apibase_0910.xlsx', index=False)
pp_raw=pd.read_excel('D:/ML/prepost/에디트립여행정보데이터.xlsx')
pp_raw
pp_raw_list = pp_raw.values.tolist()
pp_raw_list
key_pp_raw = [] key1_pp = [] key2_pp = []
for i in range(len(pp_raw)): name = pp_raw['지점명'][i] name = name.replace(" ","") address = pp_raw['신주소'][i] y = float(pp_raw['좌표(lat)'][i]) x = float(pp_raw['좌표(lon)'][i]) if address.find("제주특별자치도 ") == -1: address2 = "제주특별자치도 " + address else: address2 = address key1 = name + " " + str(round(y,8)) + " " + str(round(x,8)) key2 = name + " " + address2 key1_pp.append(key1) key2_pp.append(key2) temp = [key1, key2] key_pp_raw.append(temp)
pp_raw['key_add'] = key2_pp pp_raw['key_gps'] = key1_pp
matched_pp = [] matched_pp_time = [] matched_pp_closed = [] matched_pp_park = [] matched_pp_descripion = [] m_check_pp = []
for gps, add in key_pp_raw: timetemp = "" closedtemp = "" parktemp = "" descriptiontemp = "" matching_check = "not matched"
for name, cat, address, mapy, mapx, time, closed, park, description, address_key, gps_key in api_add: if gps==gps_key: timetemp = time closedtemp = closed parktemp = park descriptiontemp = description matching_check = "matched" elif add==address_key: timetemp = time closedtemp = closed parktemp = park descriptiontemp = description matching_check = "matched" temp = [gps, add, timetemp, closedtemp, parktemp, descriptiontemp] m_check_pp.append(matching_check) matched_pp.append(temp)
m_check_pp
matched_api = [] m_check_api = []
for name, cat, address, mapy, mapx, time, closed, park, description, address_key, gps_key in api_add:
timetemp = "" closedtemp = "" parktemp = "" descriptiontemp = "" matching_check = "not matched" for gps, add in key_pp_raw: if gps==gps_key: timetemp = time closedtemp = closed parktemp = park descriptiontemp = description matching_check = "matched" elif add==address_key: timetemp = time closedtemp = closed parktemp = park descriptiontemp = description matching_check = "matched" temp = [gps_key, address_key, timetemp, closedtemp, parktemp, descriptiontemp] m_check_api.append(matching_check) matched_api.append(temp)
m_check_api
apibase['m_check']=m_check_api
pp_raw['m_check']=m_check_pp
api_m = apibase[apibase['m_check']=='not matched']
api_m
##########################
kc_493=pd.read_csv('D:/ML/prepost/KC_493_DMSTC_MCST_CAFE_DSRT_2020.csv') kc_495=pd.read_csv('D:/ML/prepost/KC_495_LLR_ATRCTN_2020.csv') kc_505=pd.read_csv('D:/ML/prepost/KC_505_DMSTC_MCST_RSTRT_2020.csv') kc_507=pd.read_csv('D:/ML/prepost/KC_507_LLR_HISTST_2020.csv')
kc_493.insert(0, "구분", "kc_493") kc_495.insert(0, "구분", "kc_495") kc_505.insert(0, "구분", "kc_505") kc_507.insert(0, "구분", "kc_507")
df = pd.concat([kc_493, kc_495, kc_505, kc_507])
df2 = df[df['sido_nm']=='제주특별자치도']
df2
name_map = {label:idx for idx, label in enumerate(np.unique(list(df2['id_poi'])))} len(name_map)
num=[]
for i in range(len(df2)): num.append(i)
bc_data = df2
bc_data = bc_data.loc[:,['구분','poi_nm', 'sido_nm', 'sgg_nm','bemd_nm','ri_nm', 'beonji','rd_nm','bld_num','y','x','mcate_nm']]
bc_data
bc_list = bc_data.values.tolist()
col2 = ['구분','poi_nm', 'sido_nm', 'sgg_nm','bemd_nm','ri_nm', 'beonji','rd_nm','bld_num','y','x','mcate_nm']
bc_data2 = pd.DataFrame(bc_list, columns = col2)
bc_data3 = bc_data2.fillna("")
bc_data4 = bc_data3.loc[:, ['poi_nm', 'y', 'x']]
key_gps = []
for i in range(len(bc_data4)): name = bc_data4['poi_nm'][i] y = round(bc_data4['y'][i],8) x = round(bc_data4['x'][i],8) key = name+ " " + str(y) + " " + str(x) key_gps.append(key)
bc_data3['old_add'] = bc_data3['sido_nm'].str.cat(bc_data3['sgg_nm'], sep=" ")
bc_data3['new_add'] = bc_data3['old_add'].str.cat(bc_data3['rd_nm'], sep=" ")
bc_data3['new_add'] = bc_data3['new_add'].str.cat(bc_data3['bld_num'], sep=" ")
bc_data3['old_add'] = bc_data3['old_add'].str.cat(bc_data3['bemd_nm'], sep=" ")
bc_data3['old_add'] = bc_data3['old_add'].str.cat(bc_data3['ri_nm'], sep=" ")
bc_data3['old_add'] = bc_data3['old_add'].str.cat(bc_data3['beonji'], sep=" ")
bc_data3['key_gps'] = key_gps
bc_data3
dup=bc_data3[bc_data3.duplicated(['key_gps'], keep='first')]
dup
dup2=bc_data3[bc_data3.duplicated(['key_gps'], keep='last')] dup2
bc_data3=bc_data3.drop_duplicates('key_gps')
bc_df = bc_data3.loc[:, ['구분','poi_nm','old_add','new_add','y','x','mcate_nm','key_gps']]
bc_df
bc_df.loc[bc_df.poi_nm== bc_df.mcate_nm, 'mcate_nm'] = np.nan
bc_df.insert(6, "영업시간", np.nan) bc_df.insert(7, "휴무일", np.nan) bc_df.insert(8, "주차가능여부", np.nan) bc_df.insert(9, "평균소요시간", np.nan) bc_df.insert(11, "개요", np.nan) bc_df.insert(12, "key_add", np.nan) bc_df.insert(14, "m_check", np.nan)
col_name = ['구분', '지점명', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요', 'key_add', 'key_gps', 'm_check']
bc_df.columns = ['구분', '지점명', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요', 'key_add', 'key_gps', 'm_check']
pp_bc = pd.concat([pp_raw, bc_df])
pp_bc
api_m.insert(0,"구분", "TourAPI") api_m.insert(3,"구주소", np.nan) api_m.insert(10,"소요시간", np.nan)
api_m.columns = ['구분', '지점명', '태그', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '개요', 'key_add', 'key_gps', 'm_check']
api_m=api_m[['구분','지점명', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요', 'key_add', 'key_gps', 'm_check']] # 프리포스트 순서와 동일하게 컬럼 순서 변경
pp_bc_api = pd.concat([pp_bc, api_m])
listall = pp_bc_api.values.tolist()
df_all = pd.DataFrame(listall, columns = col_name)
df_final = df_all.loc[:, ['구분','지점명', '구주소','신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요']]
df_final
park_map = {label:idx for idx, label in enumerate(np.unique(list(df_final['주차가능여부(1-true, 0-false)'])))} park_map
df_final.insert(9, "park_raw", np.nan)
df_final['park_raw']=df_final['주차가능여부(1-true, 0-false)']
df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "", '주차가능여부(1-true, 0-false)'] = np.nan df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "가게앞 3대", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "가능", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "공용주차장", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "있음", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "가능", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "있음 (약 소형 150대, 대형10대 주차가능)", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "있음(100대)", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "주차 가능", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "주차가능", '주차가능여부(1-true, 0-false)'] = 1
park_map2 = {label:idx for idx, label in enumerate(np.unique(list(df_final['주차가능여부(1-true, 0-false)'])))} park_map2
df_final.to_excel('D:/ML/prepost/df_final.xlsx', index=False)
dup=pp_bc_api[pp_bc_api.duplicated(['key_gps'], keep='first')] dup2=pp_bc_api[pp_bc_api.duplicated(['key_gps'], keep='last')]
dup2
import pandas as pd import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper("http://data.visitkorea.or.kr/sparql") def getresult1(r2): prefix = """ PREFIX skos: http://www.w3.org/2004/02/skos/core# PREFIX rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# PREFIX rdfs: http://www.w3.org/2000/01/rdf-schema# PREFIX dc: http://purl.org/dc/elements/1.1/ PREFIX owl: http://www.w3.org/2002/07/owl# PREFIX xsd: http://www.w3.org/2001/XMLSchema# PREFIX vi: http://www.saltlux.com/transformer/views# PREFIX kto: http://data.visitkorea.or.kr/ontology/ PREFIX ktop: http://data.visitkorea.or.kr/property/ PREFIX ids: http://data.visitkorea.or.kr/resource/ PREFIX wgs: http://www.w3.org/2003/01/geo/wgs84_pos# PREFIX foaf: http://xmlns.com/foaf/0.1/ PREFIX geo: http://www.saltlux.com/geo/property# PREFIX pf: http://www.saltlux.com/DARQ/property# """
result1 = getresult1('제주') result2 = getresult1('서귀포')
len(result2['results']['bindings'])
result2
tourapi1=[] data = result1['results']['bindings']
for i in range(len(data)): temp=[] templabel = data[i]['label']['value'] x=float(data[i]['mapx']['value']) y=float(data[i]['mapy']['value']) temp.append(templabel) temp.append(data[i]['cat']['value']) temp.append(data[i]['subcat']['value']) temp.append(y) temp.append(x) temp.append(data[i]['address']['value']) temp.append(data[i]['descript']['value']) check = str(templabel) + " " + str(x) + " " + str(y) temp.append(check) tourapi1.append(temp)
tourapi2=[] data = result2['results']['bindings']
for i in range(len(data)): temp=[] templabel = data[i]['label']['value'] x=float(data[i]['mapx']['value']) y=float(data[i]['mapy']['value']) temp.append(templabel) temp.append(data[i]['cat']['value']) temp.append(data[i]['subcat']['value']) temp.append(y) temp.append(x) temp.append(data[i]['address']['value']) temp.append(data[i]['descript']['value']) check = str(templabel) + " " + str(x) + " " + str(y) temp.append(check) tourapi1.append(temp)
api_base = tourapi1 + tourapi2
len(api_base)
api_base
col=['name','cat1','cat3','mapy','mapx','address','description' , 'gps_key']
apibase=pd.DataFrame(api_base,columns=col)
apibase
apibase.to_excel('D:/ML/prepost/apibase_0910.xlsx', index=False)
pp_raw=pd.read_excel('D:/ML/prepost/에디트립여행정보데이터.xlsx')
pp_raw
pp_raw_list = pp_raw.values.tolist()
pp_raw_list
key_pp_raw = [] key1_pp = [] key2_pp = []
for i in range(len(pp_raw)): name = pp_raw['지점명'][i] name = name.replace(" ","") address = pp_raw['신주소'][i] y = float(pp_raw['좌표(lat)'][i]) x = float(pp_raw['좌표(lon)'][i]) if address.find("제주특별자치도 ") == -1: address2 = "제주특별자치도 " + address else: address2 = address key1 = name + " " + str(round(y,8)) + " " + str(round(x,8)) key2 = name + " " + address2 key1_pp.append(key1) key2_pp.append(key2) temp = [key1, key2] key_pp_raw.append(temp)
pp_raw['key_add'] = key2_pp pp_raw['key_gps'] = key1_pp
pp_raw
matched_pp = [] matched_pp_time = [] matched_pp_closed = [] matched_pp_park = [] matched_pp_descripion = [] m_check_pp = []
for gps, add in key_pp_raw: timetemp = "" closedtemp = "" parktemp = "" descriptiontemp = "" matching_check = "not matched"
m_check_pp
matched_api = [] m_check_api = []
for name, cat, address, mapy, mapx, time, closed, park, description, address_key, gps_key in api_add:
m_check_api
apibase['m_check']=m_check_api
pp_raw['m_check']=m_check_pp
apibase
api_m = apibase[apibase['m_check']=='not matched']
api_m
##########################
kc_493=pd.read_csv('D:/ML/prepost/KC_493_DMSTC_MCST_CAFE_DSRT_2020.csv') kc_495=pd.read_csv('D:/ML/prepost/KC_495_LLR_ATRCTN_2020.csv') kc_505=pd.read_csv('D:/ML/prepost/KC_505_DMSTC_MCST_RSTRT_2020.csv') kc_507=pd.read_csv('D:/ML/prepost/KC_507_LLR_HISTST_2020.csv')
kc_493.insert(0, "구분", "kc_493") kc_495.insert(0, "구분", "kc_495") kc_505.insert(0, "구분", "kc_505") kc_507.insert(0, "구분", "kc_507")
df = pd.concat([kc_493, kc_495, kc_505, kc_507])
df2 = df[df['sido_nm']=='제주특별자치도']
df2
name_map = {label:idx for idx, label in enumerate(np.unique(list(df2['id_poi'])))} len(name_map)
num=[]
for i in range(len(df2)): num.append(i)
지점명, 구주소, 신주소, 좌표(lat), 좌표(lon), 영업시간, 휴무일, 주차, 평균소요시간, 태그, 개요, key_add, key_gps
bc_data = df2
bc_data = bc_data.loc[:,['구분','poi_nm', 'sido_nm', 'sgg_nm','bemd_nm','ri_nm', 'beonji','rd_nm','bld_num','y','x','mcate_nm']]
bc_data
mcate_nm 태그로 넣기
bc_list = bc_data.values.tolist()
col2 = ['구분','poi_nm', 'sido_nm', 'sgg_nm','bemd_nm','ri_nm', 'beonji','rd_nm','bld_num','y','x','mcate_nm']
bc_data2 = pd.DataFrame(bc_list, columns = col2)
bc_data3 = bc_data2.fillna("")
bc_data4 = bc_data3.loc[:, ['poi_nm', 'y', 'x']]
key_gps = []
for i in range(len(bc_data4)): name = bc_data4['poi_nm'][i] y = round(bc_data4['y'][i],8) x = round(bc_data4['x'][i],8) key = name+ " " + str(y) + " " + str(x) key_gps.append(key)
bc_data3['old_add'] = bc_data3['sido_nm'].str.cat(bc_data3['sgg_nm'], sep=" ")
bc_data3['new_add'] = bc_data3['old_add'].str.cat(bc_data3['rd_nm'], sep=" ")
bc_data3['new_add'] = bc_data3['new_add'].str.cat(bc_data3['bld_num'], sep=" ")
bc_data3['old_add'] = bc_data3['old_add'].str.cat(bc_data3['bemd_nm'], sep=" ")
bc_data3['old_add'] = bc_data3['old_add'].str.cat(bc_data3['ri_nm'], sep=" ")
bc_data3['old_add'] = bc_data3['old_add'].str.cat(bc_data3['beonji'], sep=" ")
bc_data3['key_gps'] = key_gps
bc_data3
dup=bc_data3[bc_data3.duplicated(['key_gps'], keep='first')]
dup
dup2=bc_data3[bc_data3.duplicated(['key_gps'], keep='last')] dup2
bc_data3=bc_data3.drop_duplicates('key_gps')
bc_df = bc_data3.loc[:, ['구분','poi_nm','old_add','new_add','y','x','mcate_nm','key_gps']]
bc_df
bc_df.loc[bc_df.poi_nm== bc_df.mcate_nm, 'mcate_nm'] = np.nan
bc_df.insert(6, "영업시간", np.nan) bc_df.insert(7, "휴무일", np.nan) bc_df.insert(8, "주차가능여부", np.nan) bc_df.insert(9, "평균소요시간", np.nan) bc_df.insert(11, "개요", np.nan) bc_df.insert(12, "key_add", np.nan) bc_df.insert(14, "m_check", np.nan)
col_name = ['구분', '지점명', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요', 'key_add', 'key_gps', 'm_check']
bc_df.columns = ['구분', '지점명', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요', 'key_add', 'key_gps', 'm_check']
pp_bc = pd.concat([pp_raw, bc_df])
pp_bc
api_m.insert(0,"구분", "TourAPI") api_m.insert(3,"구주소", np.nan) api_m.insert(10,"소요시간", np.nan)
api_m.columns = ['구분', '지점명', '태그', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '개요', 'key_add', 'key_gps', 'm_check']
api_m=api_m[['구분','지점명', '구주소', '신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요', 'key_add', 'key_gps', 'm_check']] # 프리포스트 순서와 동일하게 컬럼 순서 변경
pp_bc_api = pd.concat([pp_bc, api_m])
listall = pp_bc_api.values.tolist()
df_all = pd.DataFrame(listall, columns = col_name)
df_final = df_all.loc[:, ['구분','지점명', '구주소','신주소', '좌표(lat)', '좌표(lon)', '영업시간', '휴무일', '주차가능여부(1-true, 0-false)', '평균소요시간(분)', '태그', '개요']]
df_final
park_map = {label:idx for idx, label in enumerate(np.unique(list(df_final['주차가능여부(1-true, 0-false)'])))} park_map
df_final.insert(9, "park_raw", np.nan)
df_final['park_raw']=df_final['주차가능여부(1-true, 0-false)']
df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "", '주차가능여부(1-true, 0-false)'] = np.nan df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "가게앞 3대", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "가능", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "공용주차장", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "있음", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "가능", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "있음 (약 소형 150대, 대형10대 주차가능)", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "있음(100대)", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "주차 가능", '주차가능여부(1-true, 0-false)'] = 1 df_final.loc[ df_final['주차가능여부(1-true, 0-false)'] == "주차가능", '주차가능여부(1-true, 0-false)'] = 1
park_map2 = {label:idx for idx, label in enumerate(np.unique(list(df_final['주차가능여부(1-true, 0-false)'])))} park_map2
df_final.to_excel('D:/ML/prepost/df_final.xlsx', index=False)
dup=pp_bc_api[pp_bc_api.duplicated(['key_gps'], keep='first')] dup2=pp_bc_api[pp_bc_api.duplicated(['key_gps'], keep='last')]
dup
dup2