tiancheng91 / collection

笔记
https://github.com/tiancheng91/collection/issues
22 stars 1 forks source link

chrome headless #1

Open tiancheng91 opened 6 years ago

tiancheng91 commented 6 years ago

安装

troubleshooting

[推荐]snap安装

snap install chromium && apt install fonts-wqy-microhei

官方deb包安装

apt-get install -y libappindicator1 fonts-liberation fonts-wqy-microhei
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
dpkg -i google-chrome*.deb
apt-get -f install

ln -s /opt/google/chrome/google-chrome /usr/bin/chrome

# 跳过node下载自带chrome
PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true

docker启动

docker container run -d -p 9222:9222 tiancheng91/chrome-headless --remote-debugging-address=0.0.0.0 --remote-debugging-port=9222

puppeteer

apt-get install gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget
npm i npm@4 -g
npm -g install puppeteer

使用

puppent启动

launch({
        executablePath: '',
        args: ['--no-sandbox'],
})

headless中使用的默认参数

chromium --headless \
  --remote-debugging-port=9222 --no-sandbox \
  --disable-background-networking \
  --disable-background-timer-throttling \
  --disable-client-side-phishing-detection \
  --disable-default-apps \
  --disable-dev-shm-usage \
  --disable-extensions \
  --disable-hang-monitor \
  --disable-popup-blocking \
  --disable-prompt-on-repost \
  --disable-sync \
  --disable-translate \
  --metrics-recording-only \
  --no-first-run \
  --safebrowsing-disable-auto-update
tiancheng91 commented 6 years ago

npm EACCES 错误

https://docs.npmjs.com/getting-started/fixing-npm-permissions

// 不知道什么鬼,换老版本npm没问题 npm i npm@4 -g

tiancheng91 commented 6 years ago

链式调用

// pup-chain
const ObjProperties = require("simple-property-retriever");
const pWaterfall = require('p-waterfall');

class Chain {
  constructor(initiator) {
    this.initiated = true; // idk, I just put it here to track things. Dead code for now.
    this.chain = []; // the container for all chains
    let addMethods = (ref) => {
      this.ref = ref;
      const properties = ObjProperties.getOwnNonenumerables(ref.__proto__);
      for (const property of properties) {
        // if (property === "constructor")
        //   continue;

        this[property] = function(...args) {
          this.chain.push(() => this.ref[property](...args))
          return this;
        };
      };
      return this.ref;
    }
    addMethods(initiator)
  }

  run() {
    let chain = [...this.chain];
    this.chain = [];
    return pWaterfall(chain)
  }
};

module.exports = Chain;
// app.js
const puppeteer = require('puppeteer');
const Chain = require("pup-chain");

(async() => {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  let urls = ["https://example.com", "https://example.org"];

  for (url of urls) {
    let mainChain = await new Chain(page);

    newurl = await mainChain
      .goto(url)
      .url()
      .run();

    title = await mainChain
      .title()
      .run();

    console.log(`${url} > URL: "${newurl}", Title: "${title}"`);
  }

  browser.close();
})();
tiancheng91 commented 6 years ago

ubuntu 18.04 部分环境异常

chrome --headless --no-sandbox --disable-setuid-sandbox --disable-gpu --disable-dev-shm-usage

抛出 Lost UI shared context, 无法加载页面

https://github.com/GoogleChrome/puppeteer/issues/1828

tiancheng91 commented 6 years ago

相关项目

tiancheng91 commented 6 years ago

需求: DevTools proxy

tiancheng91 commented 6 years ago

puppeteer 过滤脚本

page.on('request', async request => {
  if (request.url().indexOf('googleadservices.com') > 0) {
    request.abort();
  }
  request.continue();
});
tiancheng91 commented 6 years ago

多会话处理: https://github.com/GoogleChrome/puppeteer/issues/85 https://github.com/GoogleChrome/puppeteer/issues/645

const puppeteer = require('puppeteer');
const Page = require('puppeteer/lib/Page');

async function newPageWithNewContext(browser) {
  const {browserContextId} = await browser._connection.send('Target.createBrowserContext');
  const {targetId} = await browser._connection.send('Target.createTarget', {url: 'about:blank', browserContextId});
  const client = await browser._connection.createSession(targetId);
  const page = await Page.create(client, browser._ignoreHTTPSErrors, browser._screenshotTaskQueue);
  page.browserContextId = browserContextId;
  return page;
}

async function closePage(browser, page) {
  if (page.browserContextId != undefined) {
    await browser._connection.send('Target.disposeBrowserContext', {browserContextId: page.browserContextId});
  }
  await page.close();
}

(async () => {
  const browser = await puppeteer.launch();
  const page = await newPageWithNewContext(browser);
  await page.goto('https://example.com');
  console.log(await page.cookies());

  await closePage(browser, page);
  await browser.close();
})();