jcyrss / baiyueheiyu

教程官网地址 www.byhy.net
5 stars 1 forks source link

happy coding for baiyuehy#2 #4

Open jcyrss opened 4 years ago

jcyrss commented 4 years ago

参考答案

from selenium import webdriver
import time
# 导入Select类
from selenium.webdriver.support.ui import Select

from lib import CHECK_POINT2

#  删除界面列出的所有条目,
#  比如: 订单,或者客户,或者药品
#  由于 代码逻辑相同,封装在函数 delAll 中
def delAll(wd):

    while True:
        # 修改全局等待时间,以免找不到元素,等待时间较长
        wd.implicitly_wait(1)
        # 找到所有删除按钮
        # 注意,一定要每次循环都 执行一遍,
        # 因为每次删除后,界面元素重新 产生了
        delButtons = wd.find_elements_by_css_selector(
            '.search-result-item-actionbar label:nth-last-child(1)')

        # 再改回原来的等待时间
        wd.implicitly_wait(5)

        # 没有删除按钮,说明已经全部删除了
        if not delButtons:
            break

        # 点击删除按钮
        delButtons[0].click()

        # 弹出对话框 点击确定
        wd.switch_to.alert.accept()

        # 等待1秒,等界面刷新
        time.sleep(1)

#  添加 客户 或者 药品
#  由于 代码逻辑相同,封装在函数中
def addCustomerOrMedicion(field1,field2,field3):
    # 点击添加按钮
    wd.find_element_by_class_name('glyphicon-plus').click()

    # form-contorl 对应3个输入框
    inputs = wd.find_elements_by_css_selector('.add-one-area .form-control')

    # 输入 药品名称
    inputs[0].send_keys(field1)
    # 输入 编号
    inputs[1].send_keys(field2)
    # 输入 描述
    inputs[2].send_keys(field3)

    # 第1个 btn-xs 就是创建按钮, 点击创建按钮
    wd.find_element_by_css_selector('.add-one-area .btn-xs').click()

    # 等待界面刷新稳定
    time.sleep(1)

wd = webdriver.Chrome()
wd.implicitly_wait(5)

wd.get('http://127.0.0.1/mgr/sign.html')

# 根据 ID 选择元素,并且输入字符串
wd.find_element_by_id('username').send_keys('byhy')
wd.find_element_by_id('password').send_keys('88888888')

# 点击登录
wd.find_element_by_tag_name('button').click()

# ****  第1步:初始化操作 ****
# 先删除掉系统中所有的订单、客户和药品 

# 点击订单菜单,删除所有订单
wd.find_element_by_css_selector('.sidebar-menu li:nth-of-type(4)').click()
delAll(wd)

# 点击药品菜单,删除所有药品
wd.find_element_by_css_selector('.sidebar-menu li:nth-of-type(3)').click()
delAll(wd)

# 点击客户菜单,删除所有客户
wd.find_element_by_css_selector('.sidebar-menu li:nth-of-type(2)').click()
delAll(wd)

# ****  第2步:添加 客户 、 药品 、订单

# ****   添加 客户 和 药品 *****

# 点击药品菜单
wd.find_element_by_css_selector('.sidebar-menu li:nth-of-type(2)').click()

addCustomerOrMedicion('南京中医院1','2551867851','江苏省-南京市-秦淮区-汉中路-501')
addCustomerOrMedicion('南京中医院2','2551867852','江苏省-南京市-秦淮区-汉中路-502')
addCustomerOrMedicion('南京中医院3','2551867853','江苏省-南京市-秦淮区-汉中路-503')

# 点击客户菜单
wd.find_element_by_css_selector('.sidebar-menu li:nth-of-type(3)').click()
addCustomerOrMedicion('青霉素盒装1','YP-32342341','青霉素注射液,每支15ml,20支装')
addCustomerOrMedicion('青霉素盒装2','YP-32342342','青霉素注射液,每支15ml,30支装')
addCustomerOrMedicion('青霉素盒装3','YP-32342343','青霉素注射液,每支15ml,40支装')

# ****   添加 订单 *****

# 点击订单菜单
wd.find_element_by_css_selector('.sidebar-menu li:nth-of-type(4)').click()

# 点击添加按钮
wd.find_element_by_class_name('glyphicon-plus').click()

# 输入订单名称
name = wd.find_element_by_css_selector('.add-one-area .form-control')
name.send_keys('南中订单1')

# 两个select
selectElements = wd.find_elements_by_css_selector('.add-one-area select')

# 选择客户
Select(selectElements[0]).select_by_visible_text("南京中医院2")
# 选择药品
Select(selectElements[1]).select_by_visible_text("青霉素盒装1")

wd.find_element_by_css_selector(
    '.add-one-area input[type=number]')\
    .send_keys('100')

# 第1个 btn-xs 就是创建按钮, 点击创建按钮
wd.find_element_by_css_selector('.add-one-area .btn-xs').click()

# ****  第3步: 查看 添加 订单信息 *****

# 找到 列表最上面的药品信息
# 注意,药品
items = wd.find_elements_by_css_selector(
    'div.search-result-item span,div.search-result-item p')[:8]

texts = [item.text for item in items]
print(texts)

# 订单日期信息到秒,不方便直接验证,先取出来
orderTime = texts.pop(3)

# 预期内容为
expected = [
'订单:',
'南中订单1',
'日期:',
'客户:',
'南京中医院2',
'药品:',
'青霉素盒装1 * 100'
]

CHECK_POINT2('药品信息和添加内容一致', texts == expected)

# 再检查订单日期时间,转化为整数时间,好比较
intOrderTime = int(time.mktime(time.strptime(orderTime, '%Y-%m-%d %H:%M:%S')))
curTime = int(time.time())
# 绝对值 差距在100秒内
deviation = abs(intOrderTime-curTime)
print(deviation)
CHECK_POINT2('订单时间误差', deviation<100)

wd.quit()
jcyrss commented 4 years ago

补充练习 20191224a 01.zip

jcyrss commented 4 years ago

搜狐提供的接口获取数据。以股票上海机场(600009)为例,获取7月16到20号的日线数据。

在浏览器中输入 http://q.stock.sohu.com/hisHq?code=cn_600009&start=20180716&end=20180720&stat=1&order=D&period=d&callback=historySearchHandler&rt=json

各参数的含义为:

code:股票代码,以cn_开头,
start:起始时间,
end:截止时间,
stat:统计信息,为0的时候就不返回stat对应的值了,
order:排序方法(D表示降序排,A表示升序排),
period:数据周期(d表示日线,m表示月线,w表示周线)。

就可以返回这些数据,格式是json.如下:

[{
"status":0,
"hq":[
["2018-07-20","61.22","61.83","0.61","1.00%","61.22","62.69","57637","35856.55","0.53%"],
["2018-07-19","63.00","61.22","-1.54","-2.45%","60.27","63.19","61372","37656.60","0.56%"],
["2018-07-18","62.28","62.76","0.48","0.77%","61.75","63.80","48778","30629.53","0.45%"],
["2018-07-17","62.70","62.28","-0.25","-0.40%","61.16","62.70","48519","29986.43","0.44%"],
["2018-07-16","62.00","62.53","0.80","1.30%","62.00","64.30","76005","47872.05","0.70%"]
],
"code":"cn_600009",
"stat":["累计:","2018-07-16至2018-07-20","0.10","0.16%",60.27,64.3,292311,182001.16,"2.68%"]
}]

返回的数据以这条为例

"2018-07-20","61.22","61.83","0.61","1.00%","61.22","62.69","57637","35856.55","0.53%"

分别表示日期,开盘,收盘,涨跌,涨幅,最低,最高,成交量,成交额,换手。

jcyrss commented 4 years ago
import time,csv
import requests,re

class QCWY:

    def __init__(self,keyword,city,maxpagenum):
        self.keyword = keyword
        self.city = city
        self.maxpagenum = maxpagenum

    def run(self):

        areaCode = self.getAreaCode()
        totalPage = None

        with open(f'前途无忧招聘_关键词_{self.keyword}_城市_{self.city}.csv',
                  'w', newline='', encoding='gbk') as f:

            f_csv = csv.DictWriter(f,
                                   ['职位名称',
                                    '详细链接',
                                    '公司名称',
                                    '工作地点',
                                    '薪资',
                                    '发布时间',
                                    '职位信息'])
            f_csv.writeheader()

            for pageNo in range(1,self.maxpagenum+1):

                url = f'https://search.51job.com/list/{areaCode},000000,0000,00,9,99,{self.keyword},2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
                print(url)

                res = requests.get(url)
                resBody = res.content.decode('gbk')
                # print(resBody)

                # 判断总共多少页
                if totalPage is None:
                    pattern = r'id=\"hidTotalPage\".*?value=\"(.*?)\"'
                    tp = re.findall(pattern,resBody)[0]
                    print(f'总共 {tp} 页')
                    totalPage = int(tp)

                pattern = r'<div class=\"el\">.*?class=\"t1.*?\">.*?<a.*?>(?P<job>.*?)</a>.*?class=\"t2\"><a.*?>(?P<company>.*?)</a>.*?class=\"t3\">(?P<addr>.*?)</span>.*?class=\"t4\">(?P<salary>.*?)</span>.*?class=\"t5\">(?P<date>.*?)</span>'

                p = re.compile(pattern, re.DOTALL)
                for match in p.finditer(resBody):

                    row = {
                        "职位名称": match.group('job').strip(),
                        "公司名称": match.group('company'),
                        "工作地点": match.group('addr'),
                        "薪资": match.group('salary'),
                        "发布时间": match.group('date'),
                    }

                    f_csv.writerow(row)

                # 是否到了最后一页
                if pageNo == totalPage :
                    break

    def getAreaCode(self):
        '''
        经过抓包,分析得出,地区码的请求在
        https://js.51jobcdn.com/in/js/2016/layer/area_array_c.js
        '''
        res = requests.get('https://js.51jobcdn.com/in/js/2016/layer/area_array_c.js')
        part1 = res.text.split('area=')[1].split(';')[0]
        code2area = eval(part1)
        # print(code2area)

        # 创建反向查询字典
        area2code = {v: k for k, v in code2area.items()}
        # print(area2code)

        if self.city not in area2code:
            print(f'查无此地: {self.city}')
            exit(2)

        return area2code[self.city]

QCWY(keyword='python', city='上海', maxpagenum=2).run()
jcyrss commented 4 years ago

crawlStats.zip

jcyrss commented 4 years ago

点击这个链接 http://cdn1.python3.vip/files/py/phone.zip

下载zip包 ,解压后结构如下


phone
  ├─apple
  └─samsung
      ├─note
      └─s

这个目录里面对应了苹果、三星手机 的价格。
在相应目录里面,包含对应手机价格的python文件。
请同学们在维持目录结构不变的前提下,把这个目录结构做成名为python包。

然后,自己写一个Python程序调用 那个python包里面的每个
模块文件(共四个)里面的askPrice 函数,显示每种手机的价格

同学们先将那个phone包,和自己写的调用程序文件放在同一个目录下,
运行调用程序,显示各种手机价格

同学们再将那个phone包,和自己写的调用程序文件放在不同的目录下,
通过设置 sys.path 或者 环境变量PYTHONPATH,
来保证可以找到phone包,并成功调用。
jcyrss commented 4 years ago
import requests,  csv
from datetime import datetime

def stockinfo(code,startdate,enddate):
    payload = {'code': code, 'start': startdate, 'end': enddate}
    res = requests.get("http://q.stock.sohu.com/hisHq", params=payload)
    retObj = res.json()
    historyData = retObj[0]['hq']

    dateList = []
    closePriceList = []
    for dayInfo in historyData:
        # 获取收盘价
        dateList.append(dayInfo[0])
        closePriceList.append(float(dayInfo[2]))

    with open('stock.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer  = csv.writer(csvfile)
        for idx,date in enumerate(dateList):
            writer.writerow([date,closePriceList[idx]])

stockinfo('cn_600009', '20180720', '20181020')
jcyrss commented 4 years ago
import requests,json,time
from pprint import pprint
import datetime
import xlwt

# 先将一年中所有的日期存入列表,后面请求参数要用
d1 = datetime.date(2019, 1, 1)
d2 = datetime.date(2019, 12, 31)
days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 1)]

# for day in days:
#     print(day.strftime('%Y%m%d'))

# 爬取信息存入文件文件

def getAllInfo2Text():
    # 循环获取每天数据信息
    with open('info.txt','w',encoding='utf8') as f:
        for day in days:

            dayStr = day.strftime('%Y-%m-%d')
            print( f"抓取数据 {dayStr} ..." ,end='')

            # print('先请求 submitToken')

            res = requests.post('https://app2.szpilot.com.cn/szg/token/submitToken',
                          data='{}')

            ret = res.json()
            # print(ret)
            submitToken = ret['submitToken']

            # 根据抓包得到的参数,发送同样的请求给网站获取数据
            # print('再抓取当天航班')
            webParam = json.dumps({"dutyId":"-1","jobAreaCode":"","showPresetPilot":"0","pageIndex":1,"date":f"{dayStr} 00:00:00","pageSize":900})

            print(webParam)
            https_proxy = "https://10.10.1.11:1080"

            res = requests.post('https://app2.szpilot.com.cn/szg/pilotPlan/sendPilotPlan',
                          data={
                              'WEB_PARAM' : webParam,
                              'submitToken' : submitToken
                          },
                          headers={
                              'Cookie':'JSESSIONID=FB326C67F0874655F0EB19A5F6AF095B-n2.22jvm8081; Path=/szg; Secure; HttpOnly',
                              'Origin': 'https://app2.szpilot.com.cn',
                              'Referer': 'https://app2.szpilot.com.cn/szg_admin/',
                          },
                          # proxies = {'http':'http://127.0.0.1:8888'}
                         )

            ret = res.json()
            print(ret)
            if not ret['pageDatas']['totalCount'] > 0:
                print('没有记录')
                continue

            print('ok')

            # 先存入文本文件
            info = ret['pageDatas']['list']
            f.write(json.dumps(info)+'\n')

            # 太快会被认为是爬虫,拒绝服务,需要等待一下
            time.sleep(1)

# 再从文件中分析数据,转存入 excel
def analyzeFile2Excel():
    # 数据字段映射表
    fieldMap = {
        'dynamicTime' : '计划时间',
        'shipNameCn' : '中文船名',
        'shipNameEn' : '英文船名',
        'shipFlag' : '国籍(地区)',
        'shipLength' : '船长',
        'draft' : '吃水',
        'dynamicName' : '动态',
        'startBerth' : '起点泊位',
        'endBerth' : '终点泊位',
        'master' : '主引',
        'assistant' : '副引',
        'assistant2' : '其它引水',
        'orgShort' : '代理',
        'dTelephone' : '代理电话',
        '+++1' : '交通船',
        'channelName' : '航道',
        'headThruster:tailThruster' : '侧推',
        'remarks' : '备注',
    }

    # 创建一个Excel workbook 对象
    book = xlwt.Workbook()
    # 增加一个名为 '年龄表' 的sheet
    sh = book.add_sheet('引航')

    # 写标题栏
    for column, heading in enumerate(fieldMap.values()):
        sh.write(0, column, heading)

    with open('info.txt','r',encoding='utf8') as f:
        row = 0
        while True:
            # 读入一行
            oneline =  f.readline()

            if not oneline:
                break

            # 一行对应1天的数据
            oneday = json.loads(oneline)

            # 再获取当天 的每行数据
            for line in oneday:
                row += 1
                # time.strftime('%Y%m%d %H:%M', time.localtime(int(line['dynamicTime'] / 1000)))
                # pprint(line)
                for column, field in enumerate(fieldMap.keys()):
                    # 没有的,暂时填空
                    if field not in line:
                        value = ''
                    else:
                        value = line[field]

                    # 计划时间要特殊处理
                    if field == 'dynamicTime' :
                        value = time.strftime('%Y%m%d %H:%M', time.localtime(int(line['dynamicTime'] / 1000)))
                        print(value)
                    # 侧推要特殊处理
                    if field == 'headThruster:tailThruster' :
                        if 'headThruster' not in line:
                            hvalue = '无'
                        else:
                            hvalue = line['headThruster']
                        if 'tailThruster' not in line:
                            tvalue = '无'
                        else:
                            tvalue = line['tailThruster']

                        value = f"首:{hvalue} 尾:{tvalue}"

                    sh.write(row, column, value)

    # 保存文件
    book.save('引航表.xls')

getAllInfo2Text()
analyzeFile2Excel()

print('\n\n === 完成 ==== \n\n')