xiyaowong / spiders

Python爬虫,返回一定格式的信息,下载,使用flask提供简易api。抖音无水印、皮皮虾、快手、网易云音乐、qq音乐、咪咕音乐、荔枝FM音频、知乎视频、最右语音、视频、微博......
MIT License
627 stars 209 forks source link

修复bilibili视频下载 #7

Closed snltty closed 4 years ago

snltty commented 4 years ago
import re
import requests

def get(url: str) -> dict:
    """
    imgs、videos
    """
    data = {}
    headers = {
        "user-agent":
        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
        "Referer": "https://www.bilibili.com/",
    }

    av_number_pattern = r'(BV[0-9a-zA-Z]*)'
    cover_pattern = r"image: '(.*?)',"
    video_pattern = r"video_url: '(.*?)',"
    title_pattern = r'title":"(.*?)",'

    av = re.findall(av_number_pattern, url)
    if av:
        av = av[0]
    else:
        data["msg"] = "链接可能不正确,因为我无法匹配到av号"
        return data
    url = f"https://www.bilibili.com/video/{av}"

    with requests.get(url, headers=headers, timeout=10) as rep:
        if rep.status_code == 200:
            cover_url = re.findall(cover_pattern, rep.text)
            if cover_url:
                cover_url = cover_url[0]
                if '@' in cover_url:
                    cover_url = cover_url[:cover_url.index('@')]
                data["imgs"] = ['https:'+cover_url]

            video_url = re.findall(video_pattern, rep.text)
            title_text = re.findall(title_pattern, rep.text)
            if video_url:
                video_url = video_url[0]
                data["videos"] = ['https:' + video_url.replace('upos-hz-mirrorakam.akamaized.net','upos-sz-mirrorkodo.bilivideo.com')]
            if title_text:
                data["videoName"] = title_text[0]
        else:
            data["msg"] = "获取失败"
        return data

if __name__ == "__main__":
    print(get(input("url: ")))
xiyaowong commented 4 years ago

这样的你可以提pr呀:smile:

snltty commented 4 years ago

extract.py 文件我也改了一些,不适合pr