Open jellimin opened 1 year ago
예능 출연진도 추출완료하고, 디스코드 알림과 연결해두었습니다!
## 시청률 관련 크롤링 - 예능
# 필요한 모듈 불러오기
import time
import os
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
from tqdm import tqdm_notebook
from selenium.common.exceptions import NoSuchElementException
import datetime
from datetime import datetime
from datetime import date
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options as ChromeOptions
import warnings
warnings.filterwarnings('ignore')
# 크롤링 오류 발생 제거
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
def send_message(message):
requests.post("https://discord.com/api/webhooks/1140278554609844264/aUubW_3WgjV_hwzVcQPjeWrhQzm1lZBS481VVIHqO7Cq_4A7E0xcJ2FPKsxRtaWy6R1r"
, data=message)
# 예능 출연진 크롤링
ent_title = ['나 혼자 산다', '유 퀴즈 온 더 블럭', '놀라운 토요일'] # 예능프로 제목
title = []
actor = [] # 출연진 내용
series = [] # 회차
date = [] # 날짜
# 예능
for i in range(len(ent_title)):
query = "https://search.naver.com/search.naver?query={}".format(ent_title[i])
driver = webdriver.Chrome(options=options)
driver.get(query)
# driver.implicitly_wait(10)
driver.find_element(By.XPATH,"/html/body/div[3]/div[2]/div/div[1]/div[2]/div[1]/div[4]/div/div/ul/li[5]/a").click() #회차정보 탭으로 이동
for j in range(1, 3): # 2개 회차
title.append(ent_title[i])
a = "/html/body/div[3]/div[2]/div/div[1]/div[2]/div[2]/div/div/div/div[{}]/ul/li[1]/div/div[1]/strong/a/span".format(j)
aa = driver.find_element(By.XPATH, a).text
series.append(aa)
b = "/html/body/div[3]/div[2]/div/div[1]/div[2]/div[2]/div/div/div/div[{}]/ul/li[1]/div/div[1]/span".format(j)
bb = driver.find_element(By.XPATH, b).text
date.append(bb)
temp=[]
while(True):
try:
for k in range(1, 6):
c = "/html/body/div[3]/div[2]/div/div[1]/div[2]/div[2]/div/div/div/div[{0}]/ul/li[1]/div/dl/dd/a[{1}]".format(j, k)
act = driver.find_element(By.XPATH, c).text
temp.append(act)
temp = ",".join(temp)
except:
break
actor.append(temp)
dict = {'제목':title, '회차정보':series, '방영날짜':date, '출연진':actor}
df = pd.DataFrame(dict)
df.to_csv("entertainment.csv", index=False, sep=",")
# 함수 정의 후 크롤링 끝난 뒤 send_message 하기 !
message = {'content':'예능 출연진 크롤링을 완료했습니다.'}
send_message(message)
드라마 줄거리 추출 완료했고, 디스코드에 알림도 보냈습니다!