intoli / user-agents

A JavaScript library for generating random user agents with data that's updated daily.
Other
983 stars 51 forks source link

How to properly setup in puppeteer? #8

Closed terion-name closed 5 years ago

terion-name commented 5 years ago

How to properly imitate browser in puppeteer? Currently I use this:

const puppeteer = require('puppeteer-extra')
const pluginStealth = require("puppeteer-extra-plugin-stealth");
(async () => {
  console.log(proxy.url);
  puppeteer.launch({
    headless: true,
    ignoreHTTPSErrors: true,
    args: [
      `--window-size=${ua.data.viewportWidth},${ua.data.viewportHeight + windowFrame}`,
    ]
  }).then(async browser => {
puppeteer.use(pluginStealth({
  enabledEvasions: new Set([
    'chrome.runtime',
    'console.debug',
    'navigator.languages',
    'navigator.permissions',
    'navigator.webdriver',
    'navigator.plugins',
    'window.outerdimensions',
    'webgl.vendor',
    'user-agent'
  ])
}));

    const page = await browser.newPage();
    await page.setExtraHTTPHeaders({
      'Accept-Language': 'en-US,en;q=0.9',
      'Accept-Encoding': 'gzip, deflate, br'
    });

    await page.setViewport({
      width: ua.data.viewportWidth,
      height: ua.data.viewportHeight
    });
    await page.setUserAgent(ua.toString());
    await page.evaluateOnNewDocument((uad, windowFrame) => {
      Object.defineProperty(screen, 'width', {get: () => uad.screenWidth});
      Object.defineProperty(screen, 'height', {get: () => uad.screenHeight});
      Object.defineProperty(window, 'outerWidth', {get: () => uad.viewportWidth});
      Object.defineProperty(window, 'outerHeight', {get: () => uad.viewportHeight + windowFrame});
      Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
      Object.defineProperty(navigator, 'language', {get: () => ['en-US']});
      Object.defineProperty(navigator, 'connection', {get: () => uad.connection});
      Object.defineProperty(navigator, 'cpuClass', {get: () => uad.cpuClass});
      Object.defineProperty(navigator, 'oscpu', {get: () => uad.oscpu});
      Object.defineProperty(navigator, 'platform', {get: () => uad.platform});
      Object.defineProperty(navigator.plugins, 'length', {get: () => uad.pluginsLength});
      Object.defineProperty(navigator, 'vendor', {get: () => uad.vendor});

      if (uad.appName !== 'Chrome') {
        Object.defineProperty(window, 'chrome', {get: () => undefined});
      }
    }, ua.data, windowFrame);

    await page.goto('https://my-site.com/'), {waitUntil: 'networkidle0'};
})

I test it by watching realtime google analytics in realtime. If I remove setUserAgent and evaluateOnNewDocument (though using raw pluginStealth) — mu hit displays at GA realtime report. If I enable all this — GA doesn't report hit — seems to be detected as a bot and ignored.

What am I doing wrong?

terion-name commented 5 years ago

Running in headless: false and watching network activity: without my evaluateOnNewDocument everything is fine, with it — collect requests from GA are just not sent

terion-name commented 5 years ago

Oh, was my error:

Object.defineProperty(navigator, 'language', {get: () => ['en-US']});

shouldn't be array. fixed and all worked