ZFC-Digital / puppeteer-real-browser

This package is designed to bypass puppeteer's bot-detecting captchas such as Cloudflare. It acts like a real browser and can be managed with puppeteer.
https://www.npmjs.com/package/puppeteer-real-browser
MIT License
756 stars 91 forks source link

Support for adblock and other puppeter extra plugins #32

Closed sharpanimes closed 2 months ago

sharpanimes commented 7 months ago

Hey i was wondering if i could use the puppeter extra adblock feature, cause i couldnt find a way to do this currently.

mdervisaygan commented 7 months ago

Hello, you will need to modify the library files for this. First you need to set the tf variable to false when starting the browser and then set it to true with settarget. Then you can add the plugins you want to the index.js file of the library. https://github.com/zfcsoftware/puppeteer-real-browser/blob/main/src/index.js


import { connect } from 'puppeteer-real-browser'

connect({
    tf: false, // If a feature you want to use at startup is not working, you can initialize the tf variable false and update it later.
    turnstile: true
})
.then(async response => {
        const { page, browser, setTarget } = response
     setTarget({ status: true })
        page.goto('https://nopecha.com/demo/cloudflare', {
            waitUntil: 'domcontentloaded'
        })

})
sharpanimes commented 7 months ago
import { startSession, closeSession } from './module/chromium.js'
import puppeteer from 'puppeteer-extra';
import { notice, slugify } from './module/general.js'
import { autoSolve, setSolveStatus } from './module/turnstile.js'
import { fp } from './module/afp.js';
import { puppeteerRealBrowser } from './module/old.js'
import AdblockerPlugin from "puppeteer-extra-plugin-adblocker";

export { puppeteerRealBrowser };

const adblocker = AdblockerPlugin({
    blockTrackers: true, // default: false
    useCache: true,
})
var global_target_status = true

function targetFilter({ target, skipTarget }) {

    if (global_target_status === false) {
        return true
    }
    var response = !!target.url()
    if (skipTarget.find(item => String(target.url()).indexOf(String(item) > -1))) {
        response = true
    }
    return response;
}

async function handleNewPage(page) {
    fp(page);
    return page
}

const setTarget = ({ status = true }) => {
    global_target_status = status
}

export const connect = ({ args = [], headless = 'auto', customConfig = {}, proxy = {}, skipTarget = [], fingerprint = true, turnstile = false, connectOption = {}, tf = true }) => {
    return new Promise(async (resolve, reject) => {

        global_target_status = tf

        const { chromeSession, cdpSession, chrome, xvfbsession } = await startSession({
            args: args,
            headless: headless,
            customConfig: customConfig,
            proxy: proxy
        })

        puppeteer.use(adblocker);   //added adblocker here

        console.log("Changes working");
        const browser = await puppeteer.connect({
            targetFilter: (target) => targetFilter({ target: target, skipTarget: skipTarget }),
            browserWSEndpoint: chromeSession.browserWSEndpoint,
            ...connectOption
        });

        var page = await browser.pages()

        page = page[0]

        if (proxy && proxy.username && proxy.username.length > 0) {
            await page.authenticate({ username: proxy.username, password: proxy.password });
        }

        if (fingerprint === true) {
            handleNewPage(page);
        }
        if (turnstile === true) {
            setSolveStatus({ status: true })
            autoSolve({ page: page, browser: browser })
        }

        await page.setUserAgent(chromeSession.agent);

        await page.setViewport({
            width: 1920,
            height: 1080
        });

        browser.on('disconnected', async () => {
            notice({
                message: 'Browser Disconnected',
                type: 'info'
            })
            setSolveStatus({ status: false })
            await closeSession({
                xvfbsession: xvfbsession,
                cdpSession: cdpSession,
                chrome: chrome
            })
        });

        browser.on('targetcreated', async target => {
            var newPage = await target.page();

            try {
                await newPage.setUserAgent(chromeSession.agent);
            } catch (err) {
                // console.log(err.message);
            }

            try {
                await newPage.setViewport({
                    width: 1920,
                    height: 1080
                });
            } catch (err) {
                // console.log(err.message);
            }

            if (newPage && fingerprint === true) {
                try {
                    handleNewPage(newPage);
                } catch (err) { }
            }

            if (turnstile === true) {
                autoSolve({ page: newPage })
            }
        });

        resolve({
            browser: browser,
            page: page,
            xvfbsession: xvfbsession,
            cdpSession: cdpSession,
            chrome: chrome,
            setTarget: setTarget
        })
    })
}

Well i edited it like this, but ads are still there, (the addblock added here part) It would be helpful if u could guide me through

eindrawan commented 7 months ago

I guess its because the page already created, thus the plugin is not registered properly on that initial page you can try add adblocker.onPageCreated(page) after var page = page[0]

you can also check my fork: https://github.com/eindrawan/puppeteer-real-browser/blob/main/src/index.js

@zfcsoftware may I create a PR for this?

mdervisaygan commented 7 months ago

I guess its because the page already created, thus the plugin is not registered properly on that initial page you can try add adblocker.onPageCreated(page) after var page = page[0]

you can also check my fork: https://github.com/eindrawan/puppeteer-real-browser/blob/main/src/index.js

@zfcsoftware may I create a PR for this?

Hi, thanks for your support. I think some plugins may get errors when used in this way. I will update Puppeteer to allow the user to submit.

sharpanimes commented 6 months ago

hey thanks for the replies, i made it work in my on way i added the following to the index page

export const connect = ({ args = [], headless = 'auto', customConfig = {}, proxy = {}, skipTarget = [], fingerprint = true, turnstile = false, connectOption = {}, tf = true }) => {
    return new Promise(async (resolve, reject) => {

        global_target_status = tf

        const adblocker = AdblockerPlugin({
            blockTrackers: true, // default: false
            useCache: false,
        })
        const blocker = await adblocker.getBlocker()

then returned the blocker, and used the plugins manual blocking like this after creating each page

page = await browser.newPage();
setTarget({ status: true });
blocker.enableBlockingInPage(page);
alextran317 commented 5 months ago

@sharpanimes Could you please display the full code snippet that you edited?

sharpanimes commented 4 months ago

@sharpanimes Could you please display the full code snippet that you edited?

Hey, sorry if my instructions were unclear. Step-1:The first thing you need is to install the https://www.npmjs.com/package/puppeteer-extra-plugin-adblocker plugin. Step-2:After that you need to modify the puppeter-real-browser's index page. I will link the modified index.js page https://pastebin.com/CKvFwAjU Step-3:- Now in your main project file where you are using puppeter do the following

const response = await connect({
        headless: 'auto',
        args: [],
        customConfig: {},
        skipTarget: [],
        fingerprint: true,
        turnstile: true,

        tf: false
    });

    const { browser, page, setTarget, blocker } = response;
Now whenever you are creating a page do it like below 
 page = await browser.newPage();
 setTarget({ status: true });
            blocker.enableBlockingInPage(page);
        Now your adblocker should be working
        Be sure to import adblocker in you project also to be on the safer side
alextran317 commented 4 months ago

@sharpanimes I followed your instructions and encountered an error: Error [ERR_MODULE_NOT_FOUND]: Cannot find module 'D:\laragon\www\node_modules\puppeteer-real-browser\src\module\afp.js' imported from D:\laragon\www\node_modules\puppeteer-real-browser\src\index.js

sharpanimes commented 4 months ago

@sharpanimes I followed your instructions and encountered an error: Error [ERR_MODULE_NOT_FOUND]: Cannot find module 'D:\laragon\www\node_modules\puppeteer-real-browser\src\module\afp.js' imported from D:\laragon\www\node_modules\puppeteer-real-browser\src\index.js

I think there are some changes in the latest version. I am using version ^1.2.11 . So try installing this version of puppeteer-real-browser

alextran317 commented 4 months ago

@sharpanimes I followed your instructions and encountered an error: Error [ERR_MODULE_NOT_FOUND]: Cannot find module 'D:\laragon\www\node_modules\puppeteer-real-browser\src\module\afp.js' imported from D:\laragon\www\node_modules\puppeteer-real-browser\src\index.js

I think there are some changes in the latest version. I am using version ^1.2.11 . So try installing this version of puppeteer-real-browser

Thank bro

alextran317 commented 3 months ago

@sharpanimes I followed your instructions and encountered an error: Error [ERR_MODULE_NOT_FOUND]: Cannot find module 'D:\laragon\www\node_modules\puppeteer-real-browser\src\module\afp.js' imported from D:\laragon\www\node_modules\puppeteer-real-browser\src\index.js

I think there are some changes in the latest version. I am using version ^1.2.11 . So try installing this version of puppeteer-real-browser

@zfcsoftware @sharpanimes According to @sharpanimes, it works well on version 1.2.11 but doesn't work on 1.2.20. Please fix the issue for the puppeteer-real-browser version: '1.2.20'. I tried but couldn't resolve it with the new version.

mdervisaygan commented 3 months ago

I will make this feature request after major updates have been added.

alextran317 commented 3 months ago

I will make this feature request after major updates have been added.

Thank you very much.

BookZ159 commented 3 months ago

@zfcsoftware I see there are many requests for an ad-blocking feature when opening the browser. Please prioritize implementing it! Thank you very much for this useful and wonderful source code.

mdervisaygan commented 3 months ago

@zfcsoftware I see there are many requests for an ad-blocking feature when opening the browser. Please prioritize implementing it! Thank you very much for this useful and wonderful source code.

I will add support in a few days.

BookZ159 commented 3 months ago

thank bro

XLordalX commented 3 months ago

Thank you. This is a much needed feature. Might be best to allow overriding the whole launch call for more flexibility.

alextran317 commented 3 months ago

@zfcsoftware I see there are many requests for an ad-blocking feature when opening the browser. Please prioritize implementing it! Thank you very much for this useful and wonderful source code.

I will add support in a few days.

Please update . Thanks

mdervisaygan commented 2 months ago

https://github.com/zfcsoftware/puppeteer-real-browser?tab=readme-ov-file#how-to-install-puppeteer-extra-plugins Support added.

meotimdihia commented 2 months ago

@zfcsoftware it looks like the package doesn't support adblock yet.

mdervisaygan commented 2 months ago

@zfcsoftware it looks like the package doesn't support adblock yet.

It supports it. I suggest you try again.

https://github.com/user-attachments/assets/2cfc7943-6ea0-46c1-9f08-203cf6e22e9c

const test = require('node:test');
const assert = require('node:assert');
const { connect } = require('puppeteer-real-browser');

const realBrowserOption = {
    args: ["--start-maximized"],
    turnstile: true,
    headless: false,
    disableXvfb: true,
    customConfig: {},
    connectOption: {
        defaultViewport: null
    },
    plugins: []
}

test('Puppeteer Extra Plugin', async () => {
    realBrowserOption.plugins = [
        require('puppeteer-extra-plugin-adblocker')({
            interceptResolutionPriority: 0
        })
    ]
    const { page, browser } = await connect(realBrowserOption)
    await page.goto("https://www.youtube.com/results?search_query=what+is+seo", { waitUntil: "domcontentloaded" })

})
meotimdihia commented 2 months ago

@zfcsoftware Thanks, I think the package would support adblock directly :)

meotimdihia commented 2 months ago

@zfcsoftware plugins/Adblock doesn't work when the proxy is enabled. I tested with ESM:

import test from 'node:test';
import assert from 'node:assert';
import { connect } from '../../lib/esm/index.mjs';
const realBrowserOption = {
    args: ["--start-maximized"],
    turnstile: true,
    headless: false,
    disableXvfb: true,
    customConfig: {},
    connectOption: {
        defaultViewport: null
    },
    plugins: []
}

test('Puppeteer Extra Plugin', async () => {
  const AdblockerPlugin = await (
    await import("puppeteer-extra-plugin-adblocker")
  ).default
  const { browser, page } = await connect({
    headless: false,
    proxy: {
      host: "xxxx",
      port: "xxx",
      username: "xxx",
      password: "xxx"
    }, // works fine if we remove this option.
    args: [],
    skipTarget: [],
    fingerprint: false,
    turnstile: true,
    connectOption: {},
    fpconfig: {},
    plugins: [
      AdblockerPlugin({
        blockTrackers: true,
        interceptResolutionPriority: 0,
        blockTrackersAndAnnoyances: true
      })
    ]
  })
await page.goto("https://www.youtube.com/results?search_query=what+is+seo", { waitUntil: "domcontentloaded" })

})
alextran317 commented 2 months ago

@zfcsoftware please fix it

mdervisaygan commented 2 months ago

This issue has been fixed. I have attached a video of the test and sample code below. Please test again with the latest version.

https://github.com/user-attachments/assets/218ec8da-d4a0-414a-9a14-d1290c320b60

const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer')
const { connect } = require('puppeteer-real-browser')
const test = require('node:test')

test('Puppeteer Extra Plugin', async () => {
    const { page, browser } = await connect({
        args: ["--start-maximized"],
        disableXvfb: true,
        connectOption: {
            defaultViewport: null
        },
        proxy: require('./proxy.json'),
        plugins: [
            require('puppeteer-extra-plugin-adblocker')({ interceptResolutionPriority: DEFAULT_INTERCEPT_RESOLUTION_PRIORITY })
        ]
    })
    await page.goto("https://www.youtube.com/results?search_query=what+is+seo", { waitUntil: "domcontentloaded" })

})
Roichue commented 2 months ago

I cannot solve captchas when I use the page.solveRecaptchas() function. There is no response.

Error: Error: TimeoutError: Waiting failed: 10000ms exceeded (Recaptcha-plugin)


const { page, browser } = await connect({
        tf: false, 
        turnstile: true,
        args: ["--start-maximized"],
        headless: 'auto', 
        customConfig: {},
        skipTarget: [],
        fingerprint: true,
        connectOption: {},
        plugins: [
          require('puppeteer-extra-plugin-click-and-wait')(),
          require('puppeteer-extra-plugin-recaptcha')({
            provider: {
              id: '2captcha',
              token: apiKey,
            },
            visualFeedback: true,
            throwOnError: true
          }),
        ]
      });
BrianWalczak commented 4 weeks ago

Same here, having problems with reCAPTCHA. Bumping this because it should be fixed :)