Closed ShuiHuo closed 2 years ago
我刚和作者提到这个问题,你也注意到了,不管怎么修改,都只有默认清晰度
经由 @extremk 测试,现在移动端暂时仅支持标清
视频(虽然移动端“名义上”可以调节标清
与高清
,但事实上视频清晰度是一样的),这个现象同样也发生在了 PC 端,但 PC 端稍微好一些,能够返回标清
与高清
视频,所以临时做了点小改动,能够获取到 PC 端的视频资源
emmm,因为这个清晰度问题应该只是临时的嘛,我就不在 master
分支改动了,直接将下面那行里的 "clientType": 2
改为 "clientType": 1
就好
https://github.com/SigureMo/mooc-dl/blob/9e9a6755fb79cd43363ee2c5902c3342189e4042/mooc-dl.py#L109
其实比较简单,将mooc-dl.py改成下面这样
import json
import hashlib
import re
import os
import sys
import time
from urllib.parse import urlencode
from bs4 import BeautifulSoup
from utils.crawler import Crawler
from utils.config import Config
from utils.thread import ThreadPool
from utils.common import Task, repair_filename, touch_dir, size_format
from utils.playlist import Dpl
from utils.downloader import FileManager
from utils.ffmpeg import FFmpeg
spider = Crawler()
spider.trust_env = False
VIDEO, PDF, RICH_TEXT = 1, 3, 4
COURSEWARE = {VIDEO: "Video", PDF: "PDF", RICH_TEXT: "Rich_text"}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36",
}
spider.headers.update(headers)
CONFIG = Config()
def login(username, password):
""" 登录获取 token """
pd = hashlib.md5()
pd.update(password.encode("utf-8"))
passwd = pd.hexdigest()
headers = {"edu-app-type": "android", "edu-app-version": "2.6.1"}
data = {"username": username, "passwd": passwd, "mob-token": ""}
res = spider.post("http://www.icourse163.org/mob/logonByIcourse", headers=headers, data=data)
result = res.json()
code = result.get("status").get("code")
if code == 0:
return result.get("results").get("mob-token")
elif code == 100:
print("密码错误!")
return None
else:
print("登录失败!")
return None
def get_courseinfo(tid, token):
""" 获取完整课程信息 """
data = {"tid": tid, "mob-token": token}
url = "https://www.icourse163.org/mob/course/courseLearn/v1"
res = spider.post(url, data=data)
return res.json()
def get_summary(url):
"""从课程主页面获取信息"""
url = url.replace("learn/", "course/")
res = spider.get(url).text
term_id = re.search(r'termId : "(\d+)"', res).group(1)
names = re.findall(r'name:"(.+)"', res)
course_name = " - ".join(names[1:])
# term_ids = re.findall(r'id : "(\d+)",\ncourse', res)
return term_id, repair_filename(course_name)
def parse_resource(resource, token):
""" 解析课件链接、参数 """
if resource[0] == VIDEO:
_, file_path, unit_id, content_id = resource
# get signature
data = {"bizType": 1, "mob-token": token, "bizId": unit_id, "contentType": 1}
while True:
res = spider.post(
"https://www.icourse163.org/mob/j/v1/mobileResourceRpcBean.getResourceToken.rpc", data=data
)
if res.json()["results"] is not None:
break
time.sleep(0.5)
signature = res.json()["results"]["videoSignDto"]["signature"]
# get urls
data = {"enVersion": 1, "clientType": 2, "mob-token": token, "signature": signature, "videoId": content_id}
res = spider.post("https://vod.study.163.com/mob/api/v1/vod/videoByNative", data=data)
videos = res.json()["results"]["videoInfo"]["videos"]
# select quality
resolutions = [3, 2, 1]
resolution = resolutions[CONFIG["resolution"] :] + list(reversed(resolutions[: CONFIG["resolution"]]))
for reso in resolution:
for video in videos:
if video["quality"] == reso:
video_url = video["videoUrl"]
break
else:
continue
break
# download subtitle
srt_info = res.json()["results"]["videoInfo"]["srtCaptions"]
if srt_info:
for srt_item in srt_info:
srt_path = os.path.splitext(file_path)[0] + "_" + srt_item["languageCode"] + ".srt"
srt_url = srt_item["url"]
spider.download_bin(srt_url, srt_path)
return video_url, file_path, None
elif resource[0] == PDF:
_, file_path, unit_id, content_id = resource
api_url = "http://www.icourse163.org/mob/course/learn/v1"
data = {"t": 3, "cid": content_id, "unitId": unit_id, "mob-token": token}
res = spider.post(api_url, data=data)
pdf_url = res.json()["results"]["learnInfo"]["textOrigUrl"]
return pdf_url, file_path, None
elif resource[0] == RICH_TEXT:
_, file_path, json_content = resource
api_url = "http://www.icourse163.org/mob/course/attachment.htm"
data = json_content
return api_url, file_path, data
def get_resource(term_id, token, file_types=[VIDEO, PDF, RICH_TEXT]):
""" 获取课件信息 """
resource_list = []
course_info = get_courseinfo(term_id, token)
for chapter_num, chapter in enumerate(course_info.get("results").get("termDto").get("chapters")):
for lesson_num, lesson in enumerate(chapter.get("lessons")):
for unit_num, unit in enumerate(lesson.get("units")):
if unit["contentType"] not in file_types:
continue
courseware_num = (chapter_num + 1, lesson_num + 1, unit_num + 1)
file_path = CONFIG["file_path_template"].format(
base_dir=base_dir,
sep=os.path.sep,
type=COURSEWARE.get(unit["contentType"], "Unknown"),
cnt_1=get_section_num(courseware_num, level=1),
cnt_2=get_section_num(courseware_num, level=2),
cnt_3=get_section_num(courseware_num, level=3),
chapter_name=repair_filename(chapter["name"]),
lesson_name=repair_filename(lesson["name"]),
unit_name=repair_filename(unit["name"]),
)
touch_dir(os.path.dirname(file_path))
if unit["contentType"] == VIDEO:
ext = ".mp4"
file_path += ext
playlist.write_path(file_path)
resource_list.append((VIDEO, file_path, unit["id"], unit["contentId"]))
elif unit["contentType"] == PDF:
file_path += ".pdf"
resource_list.append((PDF, file_path, unit["id"], unit["contentId"]))
elif unit["contentType"] == RICH_TEXT:
if unit.get("jsonContent"):
json_content = eval(unit["jsonContent"])
file_path = CONFIG["file_path_template"].format(
base_dir=base_dir,
sep=os.path.sep,
type="File",
cnt_1=get_section_num(courseware_num, level=1),
cnt_2=get_section_num(courseware_num, level=2),
cnt_3=get_section_num(courseware_num, level=3),
chapter_name=repair_filename(chapter["name"]),
lesson_name=repair_filename(lesson["name"]),
unit_name=repair_filename(os.path.splitext(json_content["fileName"])[0])
+ os.path.splitext(json_content["fileName"])[1],
)
touch_dir(os.path.dirname(file_path))
resource_list.append((RICH_TEXT, file_path, json_content))
return resource_list
def get_section_num(courseware_num, level=3, sep=".", template="{:d}"):
""" 根据等级获取课件的标号 """
return sep.join(list((map(lambda x: template.format(x), courseware_num[:level]))))
def merge(merge_list, ffmpeg=None):
""" 合并待合并列表 """
for i, merge_file in enumerate(merge_list):
print("merging {}/{}".format(i, len(merge_list)), end="\r")
file_path = merge_file["target"]
if ffmpeg is not None:
ffmpeg.join_videos(merge_file["segments"], file_path)
else:
with open(file_path, "wb") as fw:
for ts_path in merge_file["segments"]:
with open(ts_path, "rb") as fr:
fw.write(fr.read())
for ts_path in merge_file["segments"]:
os.remove(ts_path)
if __name__ == "__main__":
root = CONFIG["root"]
num_thread = CONFIG["num_thread"]
url = sys.argv[1]
# 登录并获取信息
token = login(CONFIG["username"], CONFIG["password"])
term_id, course_name = get_summary(url)
course_id = re.match(r"https?://www.icourse163.org/(course|learn)/\w+-(\d+)", url).group(2)
print(course_name)
print(course_id)
# 创建必要环境
base_dir = touch_dir(os.path.join(root, course_name))
playlist = Dpl(os.path.join(base_dir, "Playlist.dpl"))
# 获取资源列表
resource_list = get_resource(term_id, token, file_types=CONFIG["file_types"])
# 解析资源
resources = []
merge_list = []
for i, resource in enumerate(resource_list):
print("parse_resource {}/{}".format(i, len(resource_list)), end="\r")
url, file_path, params = parse_resource(resource, token)
# 过滤掉已经下载的资源
if os.path.exists(file_path) and not CONFIG["overwrite"]:
print("[info] {} already exists!".format(file_path))
continue
if ".m3u8" in url:
merge_file = {"target": file_path, "segments": []}
id = 0
m3u8_text = spider.get(url).text
for line in m3u8_text.split("\n"):
if line.endswith(".ts"):
ts_url = "/".join(url.split("/")[:-1]) + "/" + line
ts_path = "{}{:03d}.ts".format(file_path.rstrip(".mp4"), id)
resources.append((ts_url, ts_path))
id += 1
merge_file["segments"].append(ts_path)
merge_list.append(merge_file)
else:
if params is not None:
url += "?" + urlencode(params)
url = url.replace('_sd.mp4', '_hd.mp4')
resources.append((url, file_path))
# 将资源(片段)分发至线程池,并开始下载
manager = FileManager(num_thread, spider=spider, overwrite=CONFIG["overwrite"])
manager.dispense_resources(resources)
manager.run()
# 启动(主线程)监控器,等待下载完成
manager.monitoring()
# 合并所有 ts 片段
ffmpeg = None
if CONFIG["use_ffmpeg"]:
ffmpeg = FFmpeg()
merge(merge_list, ffmpeg=ffmpeg)
print("\nDone!")
添加一行url = url.replace('_sd.mp4', '_hd.mp4')
,可成事。
有个问题是,有一些比较老的视频貌似只有低清晰度,hd可能不存在
那就下低清晰度的喽,可以加个get(url)判断一下
简单测试了下,现在应该已经可以调节清晰度了(估计很久之前就可以了,但我好久没用过 mooc-dl 了),因此先 close 本 issue
测试如下:
python mooc-dl.py 'https://www.icourse163.org/learn/BUAA-1449777166?tid=1465293450' --range=1 -w
# 849.33 MB
python mooc-dl.py 'https://www.icourse163.org/learn/BUAA-1449777166?tid=1465293450' --range=1 -w -q 1
# 692.32 MB
python mooc-dl.py 'https://www.icourse163.org/learn/BUAA-1449777166?tid=1465293450' --range=1 -w -q 2
# 464.52 MB
需要清晰度选择功能
问题描述:当前通过 python mooc-dl.py https://www.icourse163.org/course/* 下载的课程清晰度只有标清,修改 config.json 的 resolution 没有作用。