Open chankoo opened 6 years ago
소현이걸봐2
if name=='main':
try_idx = 0
print(os.getcwd()) # './aibnb-data' 임을 확인하자
flist = glob.glob('*_seoul.json') # seoul.json으로 끝나는 파일만 리스트로 가져옴
if flist != []:
try_idx = len(flist)*100
now_try_idx = try_idx # 반복상태 체크위한 now_try_idx 변수
# 반복시작
listing_ids_loop = listing_ids[try_idx:]
for listing_id in tqdm(listing_ids_loop):
if (now_try_idx % 100) == 99: # now_try_idx 100개 단위로 끊어 저장
with open(os.path.join(os.getcwd(),'review_'+str(try_idx)+'_to_'+str(now_try_idx)+'_seoul.json'),'w') as fp:
try:
json.dump(review_jsn,fp,ensure_ascii=False) # flush
except UnicodeEncodeError as e:
print(now_try_idx, e)
json.dump(review_jsn,fp,ensure_ascii=True) # retry
finally:
review_jsn = {} # 초기화
try_idx = now_try_idx+1
retry = 2
while retry >0:
try:
review_count = get_review_count(listing_id)
get_reviews(listing_id,review_count)
break
except Exception as e:
except_cnt += 1
print(e.args,e.with_traceback)
retry -= 1
if listing_id in review_jsn.keys():
del review_jsn[listing_id]
if retry == 0:
print("lising_id {} not stored".format(listing_id))
break
continue
finally:
now_try_idx += 1
# 마지막 저장 dic flush
with open(os.path.join(os.getcwd(),'review_'+str(try_idx)+'_to_'+str(now_try_idx)+'_seoul.json'),'w') as fp:
json.dump(review_jsn,fp,ensure_ascii=False) # flush
review_jsn = {} # 초기화
` # try_idx 찾기 try_idx = 0 print(os.getcwd()) # './aibnb-data' 임을 확인하자 flist = glob.glob('_seoul.json') # seoul.json으로 끝나는 파일만 리스트로 가져옴 if flist != []: try_idx = len(flist)100 now_try_idx = try_idx # 반복상태 체크위한 now_try_idx 변수
# 반복시작
listing_ids_loop = listing_ids[try_idx:]
for listing_id in tqdm(listing_ids_loop):
if (now_try_idx % 100) == 99: # now_try_idx 100개 단위로 끊어 저장
with open(os.path.join(os.getcwd(),'review_'+str(try_idx)+'_to_'+str(now_try_idx)+'_seoul.json'),'w') as fp:
try:
json.dump(review_jsn,fp,ensure_ascii=False) # flush
except UnicodeEncodeError as e:
print(now_try_idx, e)
json.dump(review_jsn,fp,ensure_ascii=True) # retry
finally:
review_jsn = {} # 초기화
try_idx = now_try_idx+1
retry = 2
while retry >0:
try:
review_count = get_review_count(listing_id)
get_reviews(listing_id,review_count)
break
except Exception as e:
except_cnt += 1
print(e.args,e.with_traceback)
retry -= 1
if listing_id in review_jsn.keys():
del review_jsn[listing_id]
if retry == 0:
print("lising_id {} not stored".format(listing_id))
break
continue
finally:
now_try_idx += 1
# 마지막 저장 dic flush
with open(os.path.join(os.getcwd(),'review_'+str(try_idx)+'_to_'+str(now_try_idx)+'_seoul.json'),'w') as fp:
json.dump(review_jsn,fp,ensure_ascii=False) # flush
review_jsn = {} # 초기화`
https://subicura.com/2017/01/19/docker-guide-for-beginners-2.html