zfcsoftware / puppeteer-real-browser

This package is designed to bypass puppeteer's bot-detecting captchas such as Cloudflare. It acts like a real browser and can be managed with puppeteer.
https://www.npmjs.com/package/puppeteer-real-browser
MIT License
553 stars 75 forks source link

it is not working exactly like a real browser because the website is detecting this #14

Closed insinfo closed 1 month ago

insinfo commented 6 months ago

is not working exactly like a real browser, the website I am using for testing is detecting that I am browsing through puppeteer, the website opens a video player if you enter it normally manually via Google Chome, but if you use puppeteer it shows a message and does not display the player

I'm using the latest version of node


const url = 'https://brbeast.com/video/f79921bbae40a577928b76d2fc3edc2a';
const sleep = ms => new Promise(res => setTimeout(res, ms));

const start = async () => {
    var { puppeteerRealBrowser } = await import('puppeteer-real-browser')
    const { page, browser } = await puppeteerRealBrowser({
        headless: false, // (optional) The default is false. If true is sent, the browser opens incognito. If false is sent, the browser opens visible.
        action:'default', // (optional) If default, it connects with puppeteer by opening the browser and returns you the page and browser. if socket is sent, it returns you the browser url to connect to. 
        executablePath:'default', // (optional) If you want to use a different browser instead of Chromium, you can pass the browser path with this variable.
        // (optional) If you are using a proxy, you can send it as follows.
        // proxy:{
        //     host:'<proxy-host>',
        //     port:'<proxy-port>',
        //     username:'<proxy-username>',
        //     password:'<proxy-password>'
        // }
    })
    console.log('Running tests..')
    // You should use it if you want the fingerprint values of the page to be changed.
    // puppeteerAfp(page);

    await page.goto(url)
    await sleep(5000)
    await page.screenshot({ path: 'testresult.png', fullPage: true })
   // await browser.close()
    console.log(`All done, check the screenshot. ✨`)
}

 start();

testresult

insinfo commented 6 months ago

It seems like the problem is that it can't fool the devtools detectors https://cdn.jsdelivr.net/npm/devtools-detector https://github.com/AEPKILL/devtools-detector

zfcsoftware commented 6 months ago

As you said, the problem is with devtools

!function() { navigator.userAgent.match(/Android/i) || navigator.userAgent.match(/webOS/i) || navigator.userAgent.match(/iPhone/i) || navigator.userAgent.match(/iPad/i) || navigator.userAgent.match(/iPod/i) || navigator.userAgent.match(/BlackBerry/i) || navigator.userAgent.match(/Windows Phone/i) || (devtoolsDetector.addListener(function(t, e) { t && (document.location.href = "../player/includes/template/no_video.html") }), devtoolsDetector.launch())
}();

This code on the page redirects to another link. It is necessary to delete this code, solve the devtools problem or remove or replace window.location.href.

insinfo commented 6 months ago

I managed to make it work in Dart like this:

import 'package:puppeteer/puppeteer.dart';

final blockUrls = [
  //'https://pl20623807.toprevenuegate.com/20/41/ad/2041ad026c42ce264b91586de1c33c6e.js',
  'https://cdn.jsdelivr.net/npm/devtools-detector',
  //'https://code.jquery.com/jquery-1.12.4.min.js',
  //'https://brbeast.com/player/assets/scripts.php?v=6'
];

void main() async {
  final url = 'https://brbeast.com/video/f79921bbae40a577928b76d2fc3edc2a';
  final browser = await puppeteer.launch(
    timeout: Duration(days: 4),
    headless: false,
    defaultViewport: DeviceViewport(width: 1280, height: 720),
  );
   // Set custom user agent
  final customUA =
      'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36';

  const script = '''
() => {

  window.onbeforeunload = function() { return "sorry, Your some work will be lost - really sorry."; };

  function isDevToolsScript() {
      var stack = new Error().stack;
      return stack.includes('devtool');
  }

  Date.prototype.originalGetTime = Date.prototype.getTime;
  Date.prototype.getTime = function () {
      if (!isDevToolsScript()) {
          return this.originalGetTime();
      }
      return 0;
  }

  const originalOnMessageSetter = Object.getOwnPropertyDescriptor(Worker.prototype, 'onmessage').set;
  Object.defineProperty(Worker.prototype, 'onmessage', {
      set: function (fn) {
          if (!isDevToolsScript()) {
              originalOnMessageSetter.call(this, fn);
              return;
          }
          newFn = (ev) => {
              ev.data.time = 0;
              fn(ev);
          }
          originalOnMessageSetter.call(this, newFn);
      }
  });
}''';
  var page = await browser.newPage();
  await page.evaluateOnNewDocument(script);
  await page.setUserAgent(customUA);

  // Setting page view  { 'width': 1280, 'height': 720 }
  // await page.setViewport(DeviceViewport(width: 1280, height: 720));
// await page.setExtraHTTPHeaders({
//      'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
//      'upgrade-insecure-requests': '1',
//      'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
//      'accept-encoding': 'gzip, deflate, br',
//      'accept-language': 'en-US,en;q=0.9,en;q=0.8'
//  });
  await page.setRequestInterception(true);
  page.onRequest.listen((req) async {
    if (blockUrls.contains(req.url)) {
      req.abort();
    } else {
      req.continueRequest();
    }

    print('url:  ${req.url} | ${req.response?.headers}');
  });
  page.goto(url);
  await Future.delayed(Duration(seconds : 4));
  await page.screenshot();
  // await myPage.pdf();
  // await page.evaluate<String>('() => document.title');

  // Gracefully close the browser's process
  //await browser.close();
}
zfcsoftware commented 3 months ago

@insinfo When you start the browser with the code below, it won't get caught so you can use it without any problem. The library will be updated to create a browser as in the code. https://github.com/zfcsoftware/cf-clearance-scraper/blob/main/module/browser.js

You can also check the answer below to use in such situations. https://github.com/zfcsoftware/puppeteer-real-browser/issues/49