AC-corporation / server

μˆ˜κ°•μ‹ μ²­ 도움 μ„œλΉ„μŠ€ AllClear Server Repository
0 stars 2 forks source link

[Feat] U-SAINT μŠ€ν¬λž©ν•‘ #1

Closed daeun084 closed 11 months ago

daeun084 commented 11 months ago

πŸ’‘ Discription


🚨 Issue

chanmin-00 commented 11 months ago
from selenium import webdriver
from selenium.webdriver.common.by import By

import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
#!pip install selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

url = "https://saint.ssu.ac.kr/irj/portal"
login_url = "https://smartid.ssu.ac.kr/Symtra_sso/smln.asp?apiReturnUrl=https%3A%2F%2Fsaint.ssu.ac.kr%2FwebSSO%2Fsso.jsp"

username = "20203058"
password = "λΉ„λ°€λ²ˆν˜Έ"

login_data = {
    "userid": username,
    "pwd": password
}

session = requests.Session()

driver = webdriver.Chrome()
driver.implicitly_wait(3)
driver.get(login_url)

# 'find_element_by_name' λŒ€μ‹  'find_element'와 By.NAME을 μ‚¬μš©ν•©λ‹ˆλ‹€.
username_element = driver.find_element(By.NAME, 'userid')
password_element = driver.find_element(By.NAME, 'pwd')
username_element.send_keys(username)
password_element.send_keys(password)
login_button = driver.find_element(By.XPATH, '//*[@id="sLogin"]/div/div[1]/form/div/div[2]/a')
login_button.click()

wait = WebDriverWait(driver, 10)

# 이제 HTML λ‚΄μš©μ„ κ°€μ Έμ˜¬ 수 μžˆμŠ΅λ‹ˆλ‹€.
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
member_name = soup.find("span", {"class": "top_user"})
print(member_name)

#html = session.get(url)
#soup = BeautifulSoup(html.text, "html.parser")
#print(soup)
chanmin-00 commented 11 months ago
from selenium import webdriver
from selenium.webdriver.common.by import By

import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
#!pip install selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

url = "https://saint.ssu.ac.kr/irj/portal"
login_url = "https://smartid.ssu.ac.kr/Symtra_sso/smln.asp?apiReturnUrl=https%3A%2F%2Fsaint.ssu.ac.kr%2FwebSSO%2Fsso.jsp"

username = "20203058"
password = "λΉ„λ°€λ²ˆν˜Έ"

login_data = {
    "userid": username,
    "pwd": password
}

session = requests.Session()

driver = webdriver.Chrome()
driver.implicitly_wait(3)
driver.get(login_url)

# 'find_element_by_name' λŒ€μ‹  'find_element'와 By.NAME을 μ‚¬μš©ν•©λ‹ˆλ‹€.
username_element = driver.find_element(By.NAME, 'userid')
password_element = driver.find_element(By.NAME, 'pwd')
username_element.send_keys(username)
password_element.send_keys(password)
login_button = driver.find_element(By.XPATH, '//*[@id="sLogin"]/div/div[1]/form/div/div[2]/a')
login_button.click()
// λ‘œκ·ΈμΈν•œ μƒνƒœμ—μ„œ ν™”λ©΄ 이동 κ°€λŠ₯
button = driver.find_element(By.XPATH, '//*[@id="ddba4fb5fbc996006194d3c0c0aea5c4"]/a')
button.click()
wait = WebDriverWait(driver, 30)

# 이제 HTML λ‚΄μš©μ„ κ°€μ Έμ˜¬ 수 μžˆμŠ΅λ‹ˆλ‹€.
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
member_name = soup.find("μ†Œν”„νŠΈ")
print(soup)
Namjeongsu commented 11 months ago

μ°¬λ―Όμ΄λž‘ 같은 λ°©μ‹μœΌλ‘œ 계속 μ‹œλ„λ₯Ό 해보고 μžˆμ—ˆλŠ”λ° 이런 λ°©μ‹μœΌλ‘œ μž‘μ„±ν•˜λ‹ˆκΉŒ μ„Έμ…˜μ΄ λ§Œλ£Œλ˜μ„œ κ·ΈλŸ°κ°€? μžλ™ λ‘œκ·Έμ•„μ›ƒ λ˜μ–΄λ²„λ €μ„œ μ›ν•˜λŠ” νŽ˜μ΄μ§€ μ½”λ“œλ₯Ό 얻을 μˆ˜κ°€ μ—†μŒ

chanmin-00 commented 11 months ago

ν•™μ μ •λ³΄λž€μ—μ„œ μ›ν•˜λŠ” κ°œμΈμ •λ³΄ 크둀링

#!pip install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By

import requests
import re
from bs4 import BeautifulSoup
from urllib.request import urlopen
from selenium.webdriver.support.ui import WebDriverWait

# μœ μ„ΈμΈνŠΈ ν™ˆνŽ˜μ΄μ§€ μ£Όμ†Œμ™€ 둜그인 νŽ˜μ΄μ§€ μ£Όμ†Œ
url = "https://saint.ssu.ac.kr/irj/portal"
login_url = "https://smartid.ssu.ac.kr/Symtra_sso/smln.asp?apiReturnUrl=https%3A%2F%2Fsaint.ssu.ac.kr%2FwebSSO%2Fsso.jsp"

# μž…λ ₯ 받을 아이디와 λΉ„λ°€λ²ˆν˜Έ
username = "20203058"
password = "Jjw112233#"

# requestsλ₯Ό 톡해 μ„Έμ…˜ μ‹œμž‘
# session = requests.Session()

driver = webdriver.Chrome()
driver.implicitly_wait(3)
driver.get(login_url)

# 'find_element_by_name' λŒ€μ‹  'find_element'와 By.NAME을 μ‚¬μš©ν•©λ‹ˆλ‹€.
# νƒœκ·Έ μš”μ†Œλ₯Ό μ°Ύμ•„μ„œ κ·Έ νƒœκ·Έκ°€ 감싸고 μžˆλŠ” 아이디와 λΉ„λ²ˆ 데이터λ₯Ό 보낸닀
username_element = driver.find_element(By.NAME, 'userid')
password_element = driver.find_element(By.NAME, 'pwd')
username_element.send_keys(username)
password_element.send_keys(password)
login_button = driver.find_element(By.XPATH, '//*[@id="sLogin"]/div/div[1]/form/div/div[2]/a')
login_button.click()
# λ‘œκ·ΈμΈν•œ μƒνƒœμ—μ„œ ν™”λ©΄ 이동 κ°€λŠ₯
button = driver.find_element(By.XPATH, '//*[@id="ddba4fb5fbc996006194d3c0c0aea5c4"]/a')
button.click()
wait = WebDriverWait(driver, 30)

num_element = driver.find_element(By.CSS_SELECTOR, 'body > script:nth-child(13)')
number = num_element.get_attribute('innerHTML')
num_index = number.find(username);
print(number[num_index:num_index+8])

iframe_element = driver.find_element(By.NAME, 'contentAreaFrame')
driver.switch_to.frame(iframe_element)
iframe2_element = driver.find_element(By.XPATH, '//*[@id="isolatedWorkArea"]')
driver.switch_to.frame(iframe2_element)

wait = WebDriverWait(driver, 30)
class_element = driver.find_element(By.CSS_SELECTOR, '#WDFD')
print(class_element.get_attribute('value'))
driver.switch_to.default_content() 
chanmin-00 commented 11 months ago

μ‘Έμ—…μ‚¬μ •ν‘œ 크둀링

#!pip install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By

import time
import requests
import re
from bs4 import BeautifulSoup
from urllib.request import urlopen
from selenium.webdriver.support.ui import WebDriverWait

# μœ μ„ΈμΈνŠΈ ν™ˆνŽ˜μ΄μ§€ μ£Όμ†Œμ™€ 둜그인 νŽ˜μ΄μ§€ μ£Όμ†Œ
url = "https://saint.ssu.ac.kr/irj/portal"
login_url = "https://smartid.ssu.ac.kr/Symtra_sso/smln.asp?apiReturnUrl=https%3A%2F%2Fsaint.ssu.ac.kr%2FwebSSO%2Fsso.jsp"

# μž…λ ₯ 받을 아이디와 λΉ„λ°€λ²ˆν˜Έ
username = "20192396"
password = "wjdtntkdtjs13@26"

# requestsλ₯Ό 톡해 μ„Έμ…˜ μ‹œμž‘
# session = requests.Session()

driver = webdriver.Chrome()
driver.implicitly_wait(3)
driver.get(login_url)

# 'find_element_by_name' λŒ€μ‹  'find_element'와 By.NAME을 μ‚¬μš©ν•©λ‹ˆλ‹€.
# νƒœκ·Έ μš”μ†Œλ₯Ό μ°Ύμ•„μ„œ κ·Έ νƒœκ·Έκ°€ 감싸고 μžˆλŠ” 아이디와 λΉ„λ²ˆ 데이터λ₯Ό 보낸닀
username_element = driver.find_element(By.NAME, 'userid')
password_element = driver.find_element(By.NAME, 'pwd')
username_element.send_keys(username)
password_element.send_keys(password)
login_button = driver.find_element(By.XPATH, '//*[@id="sLogin"]/div/div[1]/form/div/div[2]/a')
login_button.click()
# λ‘œκ·ΈμΈν•œ μƒνƒœμ—μ„œ ν™”λ©΄ 이동 κ°€λŠ₯
button = driver.find_element(By.XPATH, '//*[@id="ddba4fb5fbc996006194d3c0c0aea5c4"]/a')
button.click()
wait = WebDriverWait(driver, 10)

num_element = driver.find_element(By.CSS_SELECTOR, 'body > script:nth-child(13)')
number = num_element.get_attribute('innerHTML')
num_index = number.find(username)
print(number[num_index:num_index+8])

iframe_element = driver.find_element(By.NAME, 'contentAreaFrame')
driver.switch_to.frame(iframe_element)
iframe2_element = driver.find_element(By.XPATH, '//*[@id="isolatedWorkArea"]')
driver.switch_to.frame(iframe2_element)

wait = WebDriverWait(driver, 10)
class_element = driver.find_element(By.CSS_SELECTOR, '#WDFD')
print(class_element.get_attribute('value'))
driver.switch_to.default_content() 

grade_element = driver.find_element(By.XPATH, '//*[@id="8d3da4feb86b681d72f267880ae8cef5"]')
grade_element.click()
wait = WebDriverWait(driver, 10)
close_element = driver.find_element(By.XPATH, '//*[@id="30f2303171c98bdf57db799d0b834646"]/a')
close_element.click()

iframe_element = driver.find_element(By.NAME, 'contentAreaFrame')
driver.switch_to.frame(iframe_element)
iframe2_element = driver.find_element(By.XPATH, '//*[@id="isolatedWorkArea"]')
driver.switch_to.frame(iframe2_element)
wait = WebDriverWait(driver, 30)

# ν…Œμ΄λΈ”μ˜ display-none λͺ©λ‘μ„ λͺ¨λ‘ block으둜 λ°”κΎΈμ–΄μ€Œ
driver.execute_script('''
    var tables = document.querySelectorAll('table');
    tables.forEach(function(table) {
        table.style.display = 'block';
    });
''')

tables = driver.find_elements(By.XPATH, '/html/body/table')
# 각 ν…Œμ΄λΈ”μ˜ λ‚΄μš© 좜λ ₯
time.sleep(3)
for table in tables:
    # ν…Œμ΄λΈ”μ˜ λͺ¨λ“  ν–‰(row)을 μ°ΎκΈ°
    rows = table.find_elements(By.TAG_NAME, 'tr')
    # 각 ν–‰μ˜ μ…€(cell) 좜λ ₯
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, 'td')
        for cell in cells:
            print(cell.text)

print("good")