ministep / SQL_DataAnalysis

SQL数据分析
8 stars 0 forks source link

疫情数据COVID-19 #88

Open kemistep opened 4 years ago

kemistep commented 4 years ago

疫情数据COVID-19

CSSEGISandData/COVID-19: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE]

RamiKrispin/coronavirus: The coronavirus dataset]

province = ["香港", "黑龙江", "台湾", "湖北", "上海", "广东", "内蒙古", "北京", "山西", "澳门", "浙江", "福建", "山东", "天津", "江苏", "四川", "河北", "云南", "陕西", "辽宁", "吉林", "重庆", "甘肃", "贵州", "湖南", "安徽", "宁夏", "海南", "青海", "新疆", "广西", "江西", "西藏", "河南"];
base_url = "https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?province=";
import requests 
import pandas as pd 
df = pd.DataFrame([])
for i in province:
    url = base_url + i
    response = requests.get(url,timeout=30)
    result = response.json()
    data = result["data"];
    df_province = pd.DataFrame(data)
    df = pd.concat([df,df_province],ignore_index=True)
kemistep commented 4 years ago

另外一种获取方式:

import requests
from lxml import etree
import json
from bs4 import BeautifulSoup
import pandas as pd
province = ["香港", "黑龙江", "台湾", "湖北", "上海", "广东", "内蒙古", "北京", "山西", "澳门", "浙江", "福建", "山东", "天津", "江苏", "四川", "河北", "云南", "陕西", "辽宁", "吉林", "重庆", "甘肃", "贵州", "湖南", "安徽", "宁夏", "海南", "青海", "新疆", "广西", "江西", "西藏", "河南"];

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/76.0.3809.100 Safari/537.36 '
}
def get_covid_provice_daily(province):
    gat = province
    json_objs = []
    for i in gat:
        url = 'https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list'
        data = {
            'province': i,
        }
        response = requests.post(url=url, headers=headers, data=data)
        json_obj = json.loads(response.text)
        #print(json_obj)
        for j in json_obj['data']:
            json_objs.append(j)
    columns = ['date', 'country', 'province', 'confirm', 'dead', 'heal', 'confirm_add', 'newHeal', 'newDead']
    pf = pd.DataFrame(list(json_objs), columns=columns)
    columns_map = {
        'date': '日期',
        'country': '国家',
        'city': '省',
        'confirm': '确诊人数',
        'dead': '死亡人数',
        'heal': '治愈人数',
        'confirm_add': '新增患者',
        'newHeal': '新治愈',
        'newDead': '新死亡',
    }
    pf.rename(columns=columns_map, inplace=True)
    return pf 

df = get_covid_provice_daily(province)

参考:PythonSpider/main.py at 9007dac8801b06622faabb26f606aef8feb13360 · 321915514/PythonSpider]