Open parzival-au20 opened 6 months ago
import time from selenium import webdriver from selenium.webdriver.edge.service import Service from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from convert_csv import save_to_csv, Content_Text_Control, text_to_date
def fetch_DefenceTurk_news(): service = Service("./msedgedriver.exe") driver = webdriver.Edge(service=service)
category = "Heli"
web_site_name = "DefenceTurk"
maxPage = 9
news_array = []
for page_number in range(1,maxPage):
driver.get(f"https://www.defenceturk.net/haberler/page/{page_number}/")
driver.maximize_window()
time.sleep(3)
try:
news_list = driver.find_elements(By.CSS_SELECTOR, ".jeg_post_title > a")
except:
print(f"https://www.defenceturk.net/haberler/page/{page_number}/ ,ERROR haberin Linki bulunamadı")
return
news_link = []
for item in news_list:
href = item.get_attribute("href")
news_link.append(href)
for link in news_link[:10]:
try:
start_time = time.time()
driver.get(link)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"işlem {elapsed_time} saniye sürdü")
except:
print(link+"ERROR linke get isteği yapılamadı.")
try:
title = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "h1.jeg_post_title"))).text
except:
print(link+" ERROR haberin title bulunamadı")
continue
try:
news_text = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".wprt-container"))).text
except:
print(link+" ERROR haberin texti bulunamadı")
continue
try:
date = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".meta_left >.jeg_meta_date > a"))).text
date_index = date.find("\n")
if date_index !=-1:
date = date[:date_index]
except:
print(link+" ERROR haberin date bulunamadı")
continue
try:
img_url = driver.find_element(By.CSS_SELECTOR, ".jeg_featured.featured_image > a > div > img")
img_url = img_url.get_attribute("src")
except:
img_url = None
if(Content_Text_Control(date, news_text, web_site_name)):
news_array.append([link, category, img_url, news_text, text_to_date(date,web_site_name), title, web_site_name])
else:
continue
save_to_csv(news_array,web_site_name)
Hata olu�tu: Message: timeout: Timed out receiving message from renderer: 0.005 (Session info: edge-headless-shell=123.0.2420.65) Stacktrace: GetHandleVerifier [0x00007FF6E76F5B82+59842] Microsoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6E7684AB2+220434]
Selenium -edge.zip