parzival-au20 / sefa

0 stars 0 forks source link

Selenium Edge #49

Open parzival-au20 opened 6 months ago

parzival-au20 commented 6 months ago

Selenium -edge.zip

parzival-au20 commented 6 months ago

import time from selenium import webdriver from selenium.webdriver.edge.service import Service from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from convert_csv import save_to_csv, Content_Text_Control, text_to_date

def fetch_DefenceTurk_news(): service = Service("./msedgedriver.exe") driver = webdriver.Edge(service=service)

category = "Heli"
web_site_name = "DefenceTurk"

maxPage = 9
news_array = []
for page_number in range(1,maxPage):
    driver.get(f"https://www.defenceturk.net/haberler/page/{page_number}/")
    driver.maximize_window()
    time.sleep(3)
    try:
        news_list = driver.find_elements(By.CSS_SELECTOR, ".jeg_post_title > a")
    except:
        print(f"https://www.defenceturk.net/haberler/page/{page_number}/ ,ERROR haberin Linki bulunamadı")
        return
    news_link = []
    for item in news_list:
        href = item.get_attribute("href")
        news_link.append(href)

    for link in news_link[:10]:
        try:
            start_time = time.time()
            driver.get(link)
            end_time = time.time()
            elapsed_time = end_time - start_time
            print(f"işlem {elapsed_time} saniye sürdü")
        except:
            print(link+"ERROR linke get isteği yapılamadı.")
        try:
            title = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "h1.jeg_post_title"))).text    
        except:
            print(link+" ERROR haberin title bulunamadı")
            continue
        try:    
            news_text = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".wprt-container"))).text
        except:
            print(link+" ERROR haberin texti bulunamadı")
            continue
        try:
            date = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, ".meta_left >.jeg_meta_date > a"))).text
            date_index = date.find("\n")
            if date_index !=-1:
                date = date[:date_index] 
        except:
            print(link+" ERROR haberin date bulunamadı")
            continue
        try:
            img_url = driver.find_element(By.CSS_SELECTOR, ".jeg_featured.featured_image > a > div > img")
            img_url = img_url.get_attribute("src")
        except:
            img_url = None

        if(Content_Text_Control(date, news_text, web_site_name)):
            news_array.append([link, category, img_url, news_text, text_to_date(date,web_site_name), title, web_site_name])
        else:
            continue

save_to_csv(news_array,web_site_name) 
parzival-au20 commented 6 months ago

Hata olu�tu: Message: timeout: Timed out receiving message from renderer: 0.005 (Session info: edge-headless-shell=123.0.2420.65) Stacktrace: GetHandleVerifier [0x00007FF6E76F5B82+59842] Microsoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6E7684AB2+220434]

parzival-au20 commented 6 months ago

Selenium -edge.zip

parzival-au20 commented 6 months ago

BeautyTaker.zip

parzival-au20 commented 6 months ago

Medya_Takip_Yeni_Siteler_V4.xlsx