Open jcyrss opened 4 years ago
from selenium import webdriver
import time,csv
class QCWY:
def __init__(self,keyword,city,maxpagenum):
self.keyword = keyword
self.city = city
self.maxpagenum = maxpagenum
def run(self):
driver = webdriver.Chrome()
driver.implicitly_wait(10)
driver.get('http://www.51job.com')
# 输入关键字
driver.find_element_by_id('kwdselectid').send_keys(self.keyword)
# 选择城市
driver.find_element_by_id('work_position_input').click()
# 等待一秒,确保界面稳定
time.sleep(1)
# 选择城市,点击上方当前已经选中的城市,去掉这些
selectedCityEles = driver.find_elements_by_css_selector(
'#work_position_click_multiple_selected > span')
for one in selectedCityEles:
one.click()
# 然后再选择我们要选择的城市
cityEles = driver.find_elements_by_css_selector(
'#work_position_click_center_right_list_000000 em')
target = None
for cityEle in cityEles:
# 如果城市名相同,找到了
if cityEle.text == self.city:
target = cityEle
break
# 没有找到该名称的城市
if target is None:
input(f'{self.city} 不在热门城市列表中,请手动点击选中城市后,按回车继续...')
else:
target.click()
# 保存城市选择
driver.find_element_by_id('work_position_click_bottom_save').click()
driver.find_element_by_css_selector('div.ush > button').click()
with open(f'前途无忧招聘_关键词_{self.keyword}_城市_{self.city}.csv',
'w', newline='', encoding='gbk') as f:
f_csv = csv.DictWriter(f,
['职位名称',
'详细链接',
'公司名称',
'工作地点',
'薪资',
'发布时间',
'职位信息'])
f_csv.writeheader()
for pageNo in range(1,self.maxpagenum+1):
# 设置页码
pageNoInput= driver.find_element_by_id('jump_page')
pageNoInput.clear()
pageNoInput.send_keys(str(pageNo))
driver.find_element_by_css_selector('span.og_but').click()
# 暂停1秒
time.sleep(1)
rows = self.handleOnePage(driver)
f_csv.writerows(rows)
# 是否到了最后一页
if self.isLastPage(driver):
break
# 是否到了最后一页
def isLastPage(self,driver):
# 如果下一页是链接,表示还有下一页
NextPageButton = driver.find_element_by_css_selector('div.dw_page li:last-child')
driver.implicitly_wait(2)
hasLink = NextPageButton.find_elements_by_tag_name('a')
driver.implicitly_wait(10)
if hasLink: # 不是最后一页
return False
else: # 是最后一页
return True
def handleOnePage(self,driver):
rows = []
# 处理每页信息
jobs = driver.find_elements_by_css_selector('#resultList div[class=el]')
for job in jobs:
fields = job.find_elements_by_tag_name('span')
stringFilelds = [field.text for field in fields]
print(stringFilelds)
data = {
"职位名称": stringFilelds[0],
"公司名称": stringFilelds[1],
"工作地点": stringFilelds[2],
"薪资": stringFilelds[3],
"发布时间": stringFilelds[4],
# "职位信息": detail,
# "公司信息": gongsi
}
# 点击打开详细链接
fields[0].click()
# mainWindow变量保存当前窗口的句柄
mainWindow = driver.current_window_handle
# 新打开的窗口总是句柄列表中的最后一个
driver.switch_to.window(driver.window_handles[-1])
info = driver.find_elements_by_css_selector('.tCompany_main .job_msg')
if info and len(info)==1:
# 职位信息
data["职位信息"] = info[0].text
rows.append(data)
# 关闭具体信息页
driver.close()
# 通过前面保存的老窗口的句柄,自己切换到老窗口
driver.switch_to.window(mainWindow)
return rows
QCWY(keyword='python', city='上海', maxpagenum=3).run()
按照下面的步骤安装一个练习自动化的网站系统
点击百度网盘链接 ,下载 白月SMS系统 压缩包 bysms.zip
下载解压bysms.zip后,进入bysms目录,双击运行runserver.bat 即可启动 白月SMS系统。 出现下面这样的信息
\bysms\bysms>bysms.exe runserver 80
Watching for file changes with StatReloader
Performing system checks...
System check identified no issues (0 silenced).
You have 2 unapplied migration(s). Your project may not work properly until you apply the migrations for app(s): auth.
Run 'python manage.py migrate' to apply them.
September 07, 2019 - 22:22:19
Django version 2.2.4, using settings 'bysms.settings'
Starting development server at http://127.0.0.1:80/
Quit the server with CTRL-BREAK.
注意:该窗口不能关闭,否则web 系统就会停止
下载 下面的附件(也可以通过上面的百度网盘下载)
执行 unizp bysms.zip 解压
然后,进入bysms目录,做如下事情
执行 pip3 install django 安装Django
执行 ./runserver.sh 启动服务
由于需要80端口权限,是sudo方式启动,需要输入当前用户密码
管理员登录网址为 http://127.0.0.1/mgr/sign.html
账号为 byhy 密码为 88888888
随后编写一个自动化程序,做到如下功能
使用管理员账号自动化登录网站,
在系统中添加3种药品,依次为
'青霉素盒装1','YP-32342341','青霉素注射液,每支15ml,20支装' '青霉素盒装2','YP-32342342','青霉素注射液,每支15ml,30支装' '青霉素盒装3','YP-32342343','青霉素注射液,每支15ml,40支装'
在系统中添加3个客户,依次为 '南京中医院1','2551867851','江苏省-南京市-秦淮区-汉中路-501' '南京中医院2','2551867852','江苏省-南京市-秦淮区-汉中路-502' '南京中医院3','2551867853','江苏省-南京市-秦淮区-汉中路-503'
进入订单管理界面,添加一个订单,
客户选择 南京中医院2 药品选择 青霉素盒装1 数量填入 100盒
targetDir = r'hugo-demo/content'
pattern = r'<jcy-include>.*"(.+)"'
import os,sys
import re
# 自定义修改规则函数
def my_replace(match):
incFile = os.path.join(targetDir,match.group(1))
with open(incFile,'r',encoding='utf8') as f:
fileContent = f.read()
return fileContent
# dirpath 代表当前遍历到的目录名
for (dirpath, dirnames, filenames) in os.walk(targetDir):
for fn in filenames:
if not fn.endswith('.inc.md'):
continue
fpath = os.path.join(dirpath, fn)
with open(fpath, encoding='utf8') as f:
content = f.read()
if '<jcy-include>' not in content:
continue
newContent = re.sub(pattern, my_replace,content)
newFilePath = fpath.replace('.inc.md','.ag.md')
# 如果 现有.ag.md文件
if os.path.exists(newFilePath):
# 和现有.ag.md文件比较,如果相等,无需替换
with open(newFilePath, encoding='utf8') as f:
content2 = f.read()
if content2 == newContent:
continue
# 否则替换
print(fpath)
newFilePath = fpath.replace('.inc.md','.ag.md')
with open(newFilePath, "w", encoding='utf8') as f:
f.write(newContent)
# 股票公司信息
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import csv
#实例化驱动,打开网页
driver = webdriver.Chrome()
driver.get('http://quote.eastmoney.com/center/gridlist.html#hs_a_board')
driver.implicitly_wait(6)
#创建文件保存数据
f = open('shares.txt',mode = 'w',encoding = 'UTF8')
# mainWindow变量保存当前窗口的句柄
mainWindow = driver.current_window_handle
#10次循环,进入下一页,将股票信息添加到文件中
#将每一支股票的代码和名字添加到一个列表里
for x in range(10):
#获取一页的20行股票数据
rows = driver.find_elements(By.CSS_SELECTOR, '#table_wrapper-table tbody tr')
#20行循环,将每一行的数据写入文件
for row in rows[:2]:
#获取股票代码和名称
demo = row.find_element(By.CSS_SELECTOR,'td:nth-child(2)').text
name = row.find_element(By.CSS_SELECTOR,'td:nth-child(3)').text
#点击相关链接 数据
row.find_element(By.CSS_SELECTOR,'.listview-col-Links [href*="data.eastmoney.com/stockdata"]').click()
time.sleep(2)
#跳到数据页面
for handle in driver.window_handles:
# 得到该窗口的句柄, 不是主窗口句柄就是新打开的窗口
if handle != mainWindow:
# 切入该窗口并跳出循环,
driver.switch_to.window(handle)
break
#点击公司概况
driver.find_element(By.CSS_SELECTOR,'.tabBox [data-href="gsgk"]').click()
time.sleep(2)
#获取公司简介
info_corp = driver.find_element(By.CSS_SELECTOR,'.pannel.left').text
#写数据
f.write(f'{demo} | {name} | {info_corp}\n')
#关闭子界面
driver.close()
#跳回主界面
#通过前面保存的老窗口的句柄,自己切换到老窗口
driver.switch_to.window(mainWindow)
#点击 下一页
driver.find_element(By.CSS_SELECTOR,'.next.paginate_button').click()
time.sleep(2)
driver.quit()
sele-20191220f
参考答案