raawaa / jav-scrapy

批量抓取AV磁链或封面的苦劳力
1.05k stars 211 forks source link

TypeError: Cannot read properties of null (reading '1') #56

Open rbridomzmr opened 2 years ago

rbridomzmr commented 2 years ago

TypeError: Cannot read properties of null (reading '1') at parse (D:!Software\jav-scrapy-0.7.0\jav.js:204:20) at Request._callback (D:!Software\jav-scrapy-0.7.0\jav.js:235:28) at Request.self.callback (D:!Software\jav-scrapy-0.7.0\node_modules\request\request.js:185:22) at Request.emit (node:events:390:28) at Request. (D:!Software\jav-scrapy-0.7.0\node_modules\request\request.js:1154:10) at Request.emit (node:events:390:28) at IncomingMessage. (D:!Software\jav-scrapy-0.7.0\node_modules\request\request.js:1076:12) at Object.onceWrapper (node:events:509:28) at IncomingMessage.emit (node:events:402:35) at endReadableNT (node:internal/streams/readable:1343:12) 这个应该是没有抓到图片导致的吧,还有好像没有不抓图像的选项

jeffxinzhe commented 2 years ago

我这也报了同样的错误,一般是在大批量抓取之后报的,我把这段代码改了,加了非空判断和改了取值,将就着用,等大神更新。

function parse(script) { let gid_r = /gid\s+=\s+(\d+)/g.exec(script); / 报错信息 TypeError: Cannot read property 'length' of null gid_r有可能为null,所有要做非null判断。 /

//1、添加非空判断
if(gid_r!=null&&gid_r.length>0){

    let gid = gid_r[0].replace(/\s/g,"").substring(4);//gid_r[0]有的时候,gid_r[1]不一定有。

    let uc_r = /uc\s+=\s(\d+)/g.exec(script);
    let uc = uc_r[1];
    let img = '';
    if(/img\s+=\s+'http/i.test(script)){ //如果是完整外站url地址
        img = /img\s+=\s+'([^']+)'/g.exec(script)[1];
    }else{                              // 如果是本站相对地址
        img = new URL(/img\s+=\s+'([^']+)'/g.exec(script)[1], baseUrl).toString();
    }
    return {
        gid: gid,
        img: img,
        uc: uc,
        lang: 'zh'
    };
}

}

jeffxinzhe commented 2 years ago

function getItemPage(link, index, callback) { let fanhao = link.split('/').pop(); let coverFilePath = path.join(output, fanhao + '.jpg'); let magnetFilePath = path.join(output, fanhao + '.txt'); if (hasLimit) { count--; } try { fs.accessSync(coverFilePath, fs.F_OK); fs.accessSync(magnetFilePath, fs.F_OK); console.log(('[' + fanhao + ']').yellow.bold.inverse + ' ' + 'Alreday fetched, SKIP!'.yellow); return callback(); } catch (e) { request .get(link, function (err, res, body) { if (err) { console.error(('[' + fanhao + ']').red.bold.inverse + ' ' + err.message.red); return callback(null); } let $ = cheerio.load(body); let script = $('script', 'body').eq(2).html(); //打印script //console.log(('第250行的script').green.bold.inverse + script);

            let meta = parse(script);

            //改动第2处
            //2、添加非空判断
            if(meta!=null){

                meta.category = [];
                $('div.col-md-3 > p').each(function (i, e) {
                    let text = $(e).text();
                    if (text.includes('發行日期:')) {
                        meta.date = text.replace('發行日期: ', '');
                    } else if (text.includes('系列:')) {
                        meta.series = text.replace('系列:', '');
                    } else if (text.includes('類別:')) {
                        $('div.col-md-3 > p > span.genre').each(function (idx, span) {
                            let $span = $(span);
                            if (!$span.attr('onmouseover')) {
                                meta.category.push($span.text());
                            }
                        });
                    }
                });
                // 提取演员
                meta.actress = [];
                $('span.genre').each(function (i, e) {
                    let $e = $(e);
                    if ($e.attr('onmouseover')) {
                        meta.actress.push($e.find('a').text());
                    }
                });
                // 提取片名
                meta.title = $('h3').text();
                //获取磁力链接
                getItemMagnet(link, meta, callback);

                if (!program.nopic) {
                    // 所有截图link
                    var snapshots = [];
                    $('a.sample-box').each(function (i, e) {
                        let $e = $(e);

                        snapshots.push($e.attr('href'));
                    });
                    getSnapshots(link, snapshots, meta);
                }
           }
      });
}

}