rebrowser / rebrowser-patches

Collection of patches for puppeteer and playwright to avoid automation detection and leaks. Helps to avoid Cloudflare and DataDome CAPTCHA pages. Easy to patch/unpatch, can be enabled/disabled on demand.
https://rebrowser.net
284 stars 25 forks source link

Datadome detects typing from rebrowser #49

Open Shijomon opened 1 day ago

Shijomon commented 1 day ago

The rebrowser patches is great and i am sure there is a lot difference now from any puppeteer versions. What i found was like once we open a browser and type something it works perfect but trying to automate typing we are getting blocked. But clicks submits all works perfect only typing or pressing keyboard from rebrowser is identified. And one more thing is like I had mentioned issue earlier about iframe click that is still occurring in some cases. I am sharing my code here for reference with images. I have tried all different typing modules but no joy.

Audio to text

import requests import speech_recognition as sr import sys import re

number_mapping = { "zero": "0", "one": "1", "won": "1",
"two": "2", "too": "2",
"to": "2",
"three": "3", "free": "3",
"four": "4", "for": "4",
"five": "5", "hive": "5",
"six": "6", "seven": "7", "heaven": "7",
"eight": "8", "ate": "8",
"nine": "9", "mine": "9", "sex":"6"
}

def download_audio(url): headers = { 'authority': 'dd.prod.captcha-delivery.com', 'accept': '/', 'accept-language': 'en-GB,en;q=0.9', 'cache-control': 'no-cache', 'dnt': '1', 'pragma': 'no-cache', 'range': 'bytes=0-', 'referer': 'https://geo.captcha-delivery.com/', 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Brave";v="120"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Linux"', 'sec-fetch-dest': 'audio', 'sec-fetch-mode': 'no-cors', 'sec-fetch-site': 'same-site', 'sec-gpc': '1', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', }

response = requests.get(
    url,
    headers=headers,
)
audio_file='captcha_audio.wav'
open(audio_file,'wb').write(response.content)
return audio_file

def convert_audio_to_text(audio_file): recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source:

recognizer.adjust_for_ambient_noise(source)

    audio_data = recognizer.record(source)

try:
    # Recognize the speech in the audio file
    text = recognizer.recognize_google(audio_data, language="en-US")
    open("feedback.html", "w").write(text)
    text=text.split(' ')
    main_text=[]
    for x_e in text:
        if x_e.lower() in number_mapping:
            main_text.append(number_mapping[x_e.lower()])
        else:
            if x_e.isdigit():
                main_text.append(x_e)
    main_text=''.join(main_text)

    return main_text
except sr.UnknownValueError:
    print("Speech recognition could not understand audio")
except sr.RequestError as e:
    print(f"Could not request results from Google Web Speech API; {e}")

script,audio_url= sys.argv

audio_url='https://dd.prod.captcha-delivery.com/audio/2024-08-01/en/94295d999c064d3833bfb6e4ab825ad5.wav'

audio_file=download_audio(audio_url) numbers=convert_audio_to_text(audio_file)

numbers = re.sub(r'two|to', '2', numbers) numbers = re.sub(r'\D', '', numbers) print(numbers)

My rebrowser code

import { exec } from 'child_process'; import puppeteer from 'puppeteer'; import fs from 'fs/promises'; import { typeInto } from "@forad/puppeteer-humanize" import GoLogin from './gologin/src/gologin.js'; // import path from 'path'; // import { addExtra } from 'puppeteer-extra'; // import { delay } from './gologin/src/gologin-api.js'; // const puppeteer = addExtra(puppeteerCore);

const config = { mistakes: { chance: 10, delay: { min: 50, max: 500 } }, delays: { space: { chance: 20, min: 100, max: 200 } } }

try { const url = 'https://ticketing.liverpoolfc.com/en-GB/categories/home-tickets';

var GL = new GoLogin({
    token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqd3RpZCI6IjY3MWJiMDVmYmJhMDA4OTE3MjhjMmMzNiIsInR5cGUiOiJ1c2VyIiwic3ViIjoiNjcxYmIwNWZiYmEwMDg5MTcyOGMyYzM0In0.Xsu0QveI6fFR-kgLVIPbTYtwRMNis6rwtMT3P8V1KxE',
    extra_params: ['--max_old_space_size=4096']
});

const proxyset = [
    ""
]

const osSystems = ['win', 'mac', 'android'];
const characters = 'abcdefghijklmnopqrstuvwxyz';
let result = '';
for (let i = 0; i < 6; i++) {
    var randomIndex = Math.floor(Math.random() * characters.length);
    result += characters.charAt(randomIndex);
}
let proxy='';
// let proxy = proxyset[Math.floor(Math.random() * proxyset.length)];
let proxy_imp;
if (proxy) {
    if (proxy) {
        if (proxy.includes('@')) {
            let proxyload = proxy.split("@");
            proxy_imp = {
                "mode": "http",
                "host": proxyload[1].split(":")[0],
                "port": proxyload[1].split(":")[1],
                "username": proxyload[0].split(":")[0],
                "password": proxyload[0].split(":")[1],
            };
        } else {
            proxy_imp = {
                "mode": "http",
                "host": proxy.split(":")[0],
                "port": proxy.split(":")[1],
                "username": "",
                "password": "",
            };
        }
    }

} else {
    proxy_imp = {
        "mode": "none",
        "host": "string",
        "port": 0,
        "username": "string",
        "password": "string",
    };
}

const options = {
    name: result,
    os: osSystems[Math.floor(Math.random() * osSystems.length)],
    proxy: proxy_imp
};
const profile_id = await GL.create(options)
console.log("profileid", profile_id)

GL = new GoLogin({
    token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqd3RpZCI6IjY3MWJiMDVmYmJhMDA4OTE3MjhjMmMzNiIsInR5cGUiOiJ1c2VyIiwic3ViIjoiNjcxYmIwNWZiYmEwMDg5MTcyOGMyYzM0In0.Xsu0QveI6fFR-kgLVIPbTYtwRMNis6rwtMT3P8V1KxE',
    profile_id: profile_id,
    extra_params: [
        '--disable-gpu',
    ]
});
const { status, wsUrl } = await GL.startLocal().catch((e) => {
    console.trace(e);

    return { status: 'failure' };
});

if (status !== 'success') {
    console.log('Invalid status');
} else {
    console.log("connected");
}
await new Promise(resolve => setTimeout(resolve, 3000));

const browser = await puppeteer.connect({
    // targetFilter: (target) => !!target.url(),
    browserWSEndpoint: wsUrl.toString(),
});

await new Promise(resolve => setTimeout(resolve, 3000));

const pages = await browser.pages();
const page = pages[0];
await page.goto(url, { waitUntil: "domcontentloaded" });
// await page.goto(url);

console.log("loaded")

await new Promise(resolve => setTimeout(resolve, 3000));
const cookie = await datadomCookie(page);
await page.close();
await browser.close();
await GL.delete(profile_id);

} catch (error) { console.log(Error ${error.message}) }

async function switchToChallengeFrame(page) { const frames = page.frames(); let iframeFound = false; for (const frame of frames) { const frameElement = await frame.frameElement(); if (frameElement) { const tagName = await frameElement.getProperty('tagName').then(tag => tag.jsonValue()); if (tagName === 'IFRAME') { iframeFound = true; try { await frame.waitForSelector('#captcha-container', { timeout: 10000 }); console.log('Switched to iframe successfully'); return frame; } catch (error) { console.error('Could not access elements inside iframe:', error); return null; } } } }

if (!iframeFound) {
    console.error('No iframe found');
}
return null;

}

async function getAudioText(audioUrl) { const command = python audio_solve.py ${audioUrl};

return new Promise((resolve, reject) => {
    exec(command, (error, stdout, stderr) => {
        if (error) {
            console.error(`Error executing command: ${error.message}`);
            reject(error);
            return;
        }

        if (stderr) {
            console.error(`stderr: ${stderr}`);
            reject(stderr);
            return;
        }

        resolve(stdout.trim());
    });
});

}

async function datadomCookie(page) { await new Promise(resolve => setTimeout(resolve, 4000)); const iframe = await switchToChallengeFrame(page);

await new Promise(resolve => setTimeout(resolve, 2000));
if (iframe) {
    try {

        await iframe.waitForSelector('#captcha__audio__button')
        await new Promise(resolve => setTimeout(resolve, 3000));
        await iframe.click('#captcha__audio__button');
        await new Promise(resolve => setTimeout(resolve, 3000));

        const iframePageSource = await iframe.content();
        // const iframePageSource = await iframe.evaluate(() => document.documentElement.outerHTML);
        fs.writeFile("save1.html", iframePageSource);
        console.log("offline saved")
        const captchaAudioChallengePathRegex = /captchaAudioChallengePath: '(.*?)'/;
        const audioFile = captchaAudioChallengePathRegex.exec(iframePageSource)[1];
        console.log(audioFile);
        const audioText = await getAudioText(audioFile)
        await iframe.waitForSelector('.audio-captcha-play-button', { timeout: 5000 });
        await iframe.click('.audio-captcha-play-button');
        await new Promise(resolve => setTimeout(resolve, 5000));
        await iframe.waitForSelector('.audio-captcha-inputs', { timeout: 5000 });
        console.log("wait completed")
        const captchaValue = audioText.trim().split('');
        const inputElements = await iframe.$$('#captcha__audio > div.audio-captcha-input-container > input');
        await inputElements[0].focus();
        for (let i = 0; i < captchaValue.length; i++) {
            await typeInto(inputElements[i], captchaValue[i], config);
            // await page.keyboard.type(captchaValue[i],{delay:1000})
            console.log("Typed value:", captchaValue[i]);
            await new Promise(resolve => setTimeout(resolve, 1000));
            // if (i==4){
            //     await new Promise(resolve => setTimeout(resolve, 1000000));
            // }
        }
        await new Promise(resolve => setTimeout(resolve, 5000));
        console.log("Completed typing all values");

        await new Promise(resolve => setTimeout(resolve, 10000));
        console.log("clicked")
        var datadomcookie = await page.evaluate(() => {
            const cookies = document.cookie.split('; ');
            const datadomeCookie = cookies.find(cookie => cookie.startsWith('datadome='));
            console.log("datadom", datadomeCookie ? datadomeCookie.split('=')[1] : null); // return the value or null if not found
        });
        // return datadomcookie;

    } catch (error) {
        console.error('Error getting audio file:', error);
        // return null;
    }
} else {
    console.error('Could not switch to iframe to get audio file');
    // return null;
}

}

package.json

{ "name": "datadome", "version": "1.0.0", "main": "index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], "author": "", "license": "ISC", "description": "", "dependencies": { "@forad/puppeteer-humanize": "^1.1.6", "express": "^4.21.1", "puppeteer": "^23.6.0" } }

image githubone image
nwebson commented 1 day ago

It's quite a lot of code, could you try to extract specific part or make a separate example showing your issue?