import re, pprint
date_regex = r"\d{4}년 .+월 .+?일"
url_regex = r"(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&\/\/=]*)"
cur_date = ""
month_week_and_link_urls = dict()
with open("KakaoTalk_20231006_1604_34_156_group.txt", "r", encoding='UTF8') as f:
for line in f:
date = re.compile(date_regex).search(line)
if date != None:
cur_date = date.group().strip()
# print(cur_date)
link = re.compile(url_regex).search(line)
if link != None:
cur_link = link.group()
# print(link.group())
if cur_date not in month_week_and_link_urls.keys():
month_week_and_link_urls[cur_date] = list()
month_week_and_link_urls[cur_date].append(cur_link)
pprint.pprint(month_week_and_link_urls)
TODO
민정님
Convert python dict -> mark down format, need to decide mark down format동혁님
Show link's thumbnail (detail: Thumbnail&title parsing and make it as image (html meta tag)) ref. link일표님
Upload on github as PR인제님
Grouping links by weeks (1st, 2nd week ..)Idea
Issues