下载电子工业出版社开放出来的电子书

hanxi commented 4 years ago

下载《电子工业出版社》开放的 pdf 电子书

用 chome dev tools 调试了下，找到 pdf 是用 AES 加密过的。主要是在这个文件里断点：

https://yd.51zhy.cn/ebook/reader/static/js/0.f4c9028886a7b391c0df.1574068669001.js

搜索 AES.decrypt 跟踪到 pdf 解密流程是根据 this.v.authorKey 来解密的。如何计算 this.v.authorKey 呢？

继续搜索 authorKey 找到

r.authorKey = r.makeKey(r.readData.devicekey, e.Data.Key)

断点调试得到 devicekey 为： LQBhm1oQvo2pDGBO

Data.Key 为 9TWh4BIVxRJMrYnyw1wlx8Huz/eLPe6H0Y5VRHmOl90=

查看 makeKey 的实现如下：

makeKey: function(e, t) {
    e = e;
    var n = _.a.enc.Utf8.parse(e);
    return _.a.AES.decrypt(t, n, {
        mode: _.a.mode.ECB,
        padding: _.a.pad.Pkcs7
    })
},

用 openssl 命令就是这样的：

echo '9TWh4BIVxRJMrYnyw1wlx8Huz/eLPe6H0Y5VRHmOl90=' | openssl enc -d -aes-128-ecb -a -K 4c5142686d316f51766f32704447424f

其中 4c5142686d316f51766f32704447424f 是 LQBhm1oQvo2pDGBO 的 hex 形式。

算出结果为 DzglNMjfYapT9XGy, 再算出对应的 hex 为 447a676c4e4d6a665961705439584779

然后再从 chrome dev tools 的 network 里找到 pdf 的下载链接，右键拷贝出 curl 命令，然后把页码换成下面的变量 $i，批量下载。最后用 pdf 工具合并搞定，合并工具我选的 cpdf。

for((i=1;i<=432;i++));do
    echo "$i".pdf

    # 下载 pdf
    curl "https://file.51zhy.cn/files/encryptfiles/b2a/....../"$i".pdf ... -o "$i".pdf.aes

    # 解密 pdf
    openssl enc -d -aes-128-ecb -K 447a676c4e4d6a665961705439584779 -in "$i".pdf.aes -out "$i".pdf

done

# 合并 pdf
cpdf *.pdf -o ../out.pdf

上面的方法仅用于学习目的，强烈建议不要用来做爬虫。

20200206更新：上面走了点弯路，用下面的 tampermonkey 脚本方法更方便。

// ==UserScript==
// @name         bridge.51zhy.cn
// @namespace    http://tampermonkey.net/
// @version      0.1
// @grant    GM_xmlhttpRequest
// @description  try to take over the world!
// @author       You
// @match        yd.51zhy.cn/*
// @grant    GM_setClipboard
// ==/UserScript==

let allText = {};

(function (open) {
    XMLHttpRequest.prototype.open = function () {
        if (arguments[1].includes("/content/authorize")) {
            this.addEventListener("load", function () {
                let responseOBJ = JSON.parse(this.responseText);
                allText.Key = responseOBJ.Data.Key;
                allText.Url = responseOBJ.Data.Url;
                allText.deviceKey = localStorage.deviceKey;
                let keybase = CryptoJS.enc.Utf8.parse(allText.deviceKey)
                let authorKey = CryptoJS.AES.decrypt(allText.Key, keybase, {
                    mode: CryptoJS.mode.ECB,
                    padding: CryptoJS.pad.Pkcs7
                }).toString(CryptoJS.enc.Hex)
                allText.authorKey = authorKey;
                delete allText.Key;
                delete allText.deviceKey;
                console.log(allText);
                let ret = "authorKey='"+allText.authorKey+"'\n"
                  + "url='"+allText.Url+"'\n"
                  + "isbn='"+allText.Isbn+"'\n"
                  + "title='"+allText.Title+"'\n";
                console.log(ret);
                GM_setClipboard(ret);
                alert("Url copy to clipboard OK!");
            }, false);
        } else if (arguments[1].includes("Content/Detail")) {
            this.addEventListener("load", function () {
                let responseOBJ = JSON.parse(this.responseText);
                allText = {};
                allText.Isbn = responseOBJ.Data.ExtendData.Isbn;
                allText.Title = responseOBJ.Data.Title;
            }, false);
        }
        open.apply(this, arguments);
    };
})(XMLHttpRequest.prototype.open);

然后粘贴到脚本下载和解密

authorKey='594c4528626731776a79304867436243'
url='https://file.51zhy.cn/files/encryptfiles/59c/40181f28a7f5e76f2974cdb747fec68598844e26.pdf'
isbn='978-7-121-27637-8'
title='游戏中的数学'

curl $url --compressed -o "$title".pdf.aes
openssl enc -d -aes-128-ecb -K $authorKey -in "$title".pdf.aes -out "$title".pdf

202002061714 更新：这东西见光就死了，现在 js 代码里的字符串被转正 hex 格式了，解密算法应该也修改了，断点调试还是一样的，只是搜索字符串麻烦了点，需要先转成字符串看看要搜的东西是啥，比如下面这个脚本是将 hex 转成字符串的：

# tostr.sh
TESTDATA=$(echo $1 | tr '\\x' '0x ')
echo $TESTDATA
for c in $TESTDATA; do
    echo $c | xxd -r
done

sh tostr.sh '\x6d\x6f\x64\x65'

cani1see commented 4 years ago

// ==UserScript==
// @name         bridge.51zhy.cn
// @namespace    http://tampermonkey.net/
// @version      0.1
// @grant    GM_xmlhttpRequest
// @description  try to take over the world!
// @author       You
// @match        http://yd.51zhy.cn/*
// @grant    GM_setClipboard
// ==/UserScript==

let allText = {};

(function (open) {
  XMLHttpRequest.prototype.open = function () {
    if (arguments[1].includes("/content/authorize")) {
      this.addEventListener("load", function () {
        let responseOBJ = JSON.parse(this.responseText);
        allText.Key = responseOBJ.Data.Key;
        allText.Url = responseOBJ.Data.Url;
        listCookies()
      }, false);
    } else if (arguments[1].includes("Content/Detail")) {
      this.addEventListener("load", function () {
        let responseOBJ = JSON.parse(this.responseText);
        allText = {};
        allText.Isbn = responseOBJ.Data.ExtendData.Isbn;
        allText.Title = responseOBJ.Data.Title;
      }, false);
    }
    open.apply(this, arguments);
  };
})(XMLHttpRequest.prototype.open);

function listCookies() {
  var cookieList = document.cookie.split(/;\s*/);

  for (var J = 0, numCookies = cookieList.length; J < numCookies; ++J) {
    let prop = cookieList[J]
    if (prop.includes("deviceKey")) {
      let propArray = prop.split("=")
      let deviceKey = propArray[1]
      allText.deviceKey = deviceKey;
      let keybase = CryptoJS.enc.Utf8.parse(allText.deviceKey)
      let authorKey = CryptoJS.AES.decrypt(allText.Key, keybase, {
        mode: CryptoJS.mode.ECB,
        padding: CryptoJS.pad.Pkcs7
      }).toString(CryptoJS.enc.Hex)
      allText.authorKey = authorKey;
      delete allText.Key;
      delete allText.deviceKey;
      GM_setClipboard(JSON.stringify(allText, null, 2));
      alert("OK!")
    }
  }
}

hanxi commented 4 years ago

@cani1see 你的脚本不错， deviceKey 可以这样取 allText.deviceKey = localStorage.deviceKey;

需要考虑 https 页面的情况：

// @match        yd.51zhy.cn/*

原来可以下载完整的 pdf，我走了弯路。

hanxi commented 4 years ago

@cani1see 应该可以做成一个按钮，点击就下载 pdf 再解密再保存的。

cani1see commented 4 years ago

@hanxi 我没处理过下载文件再操作，另外我要URL放到nas上下载，你可以研究下

hanxi commented 4 years ago

@cani1see 如果是用 aria2 的话，可以参考 aria2 相关的猴油脚本。aria2 有接口开启下载的。

yucongo commented 4 years ago

用 openssl 命令就是这样的：

echo '9TWh4BIVxRJMrYnyw1wlx8Huz/eLPe6H0Y5VRHmOl90=' | openssl enc -d -aes-128-ecb -a -K 4c5142686d316f51766f32704447424f 其中 4c5142686d316f51766f32704447424f 是 LQBhm1oQvo2pDGBO 的 hex 形式。

win10 下用 git带的 openssl 上面的 echo 不能带 ‘ ’ 好像

感谢分享…… 我来学习一下

hanxi commented 4 years ago

https://github.com/shylocks/51zhy_pdf/blob/master/tutorial.MD

tuxzz commented 4 years ago

解密多费劲，不如直接hook pdfjs拦数据 https://gist.github.com/tuxzz/3bd4a9fbe53f3d8e11bd1745d7f75fbc

hanxi / blog

下载电子工业出版社开放出来的电子书 #44