Open ainixiaguoyong opened 6 years ago
import requests import re #正则表达式 import json # 模拟浏览器去下载一首歌mp3 # url = 'http://zhangmenshiting.qianqian.com/data2/music/e93d963095b109ff47de85f1b41ffdd1/522883870/522883870.mp3?xcode=23787929c9177917ed47f60d337fa3fd' # # 发送请求 # response = requests.get(url) # # print(response.content) # content 二进制数据 # # 持久化,也就是储存 # with open('test.mp3', 'wb') as f: # 以wb写二进制的形式打开test.mp3(若不存在,新建)文件 # f.write(response.content) # 下载mp3的id # http://music.baidu.com/search?key=刘德华 def get_sids_by_name(name): url = 'http://music.baidu.com/search' data = { 'key': name } response = requests.get(url, params=data) response.encoding = 'utf-8' html = response.text # print(html) ul = re.findall(r'<ul.*</ul>', html, re.S)[0] # print(ul) # 获取sid sid":551560464 sids = re.findall(r'sid":(\d+),', ul, re.S) return sids # 根据 song_id 下载mp3 def get_mp3_by_id(song_id): song_id = song_id api = 'http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.play&format=jsonp&callback=jQuery17205500581185420972_1513324047403&songid=%s&_=1513324048127' % song_id response = requests.get(api) data = response.text data = re.findall(r'\((.*)\)', data)[0] # json data = json.loads(data) # print(data) # print(data['songinfo']) title = data['songinfo']['title'] mp3_url = data['bitrate']['show_link'] # print(title, mp3_url) # 下载mp3 mp3_data = requests.get(mp3_url).content # 持久化(保存) with open('%s.mp3' % title, 'wb') as f: f.write(mp3_data) sids = get_sids_by_name('刘德华') for sid in sids: print(sid) get_mp3_by_id(sid)
python 百度音乐 爬虫
主要也是喜欢爬虫这东西,起初听健旭老师讲的时候,挺蒙圈的,但也是挺喜欢这种高逼格的操作。然后就回去自己看,网上查资料,虽然到现在也基本不会。但是当我模仿着网上写的这段爬虫程序,也仔细研究了,基本弄懂了其中的逻辑。
总结了以下几点,还望健旭老师指正: