Xewdy444 / Playwright-reCAPTCHA

A Python library for solving reCAPTCHA v2 and v3 with Playwright
https://pypi.org/project/playwright-recaptcha/
MIT License
276 stars 38 forks source link

No unchecked reCAPTCHA boxes were found #65

Closed Lyfhael closed 11 months ago

Lyfhael commented 11 months ago

Hey, I'm trying to bypass the reCAPTCHA of this web page but I feel like I'm not using your library properly and it says no unchecked reCAPTCHA boxes were found. In the source code it says data-version="v2_invisible" for the captcha element, maybe it's because it's invisible that the library can't find it ?

But I saw your comment here : https://github.com/Xewdy444/Playwright-reCAPTCHA/issues/6#issuecomment-1407013307 and in my script I do click the button before trying to solve the captcha


def scrape_whoxy(email, retry_count=7):
    for attempt in range(retry_count):
        try:
            with sync_playwright() as p:
                user_agents = [
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
                    "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1 Mobile/15E148 Safari/604.1",
                    "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1",
                    "Mozilla/5.0 (Linux; Android 11; Pixel 3 XL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Mobile Safari/537.36",
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.100.0",
                    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"
                ]
                # proxy = random.choice(PROXIES)
                # proxy_settings = {
                #   "server": f"socks5://{proxy}",
                #   # Uncomment and fill these if your proxy requires authentication
                #   # "username": "your-username",
                #   # "password": "your-password"
                # }
                browser = p.chromium.launch(headless=False)  # Non-headless browser
                context = browser.new_context(user_agent=random.choice(user_agents))

                # Add other stealth techniques here
                page = context.new_page()
                stealth_sync(page)
                page.goto("https://www.whoxy.com/reverse-whois/demo.php")
                page.wait_for_selector('[name="search_identifier"]')
                pyperclip.copy(page.content())
                page.select_option('select[name="search_identifier"]', 'email')
                page.fill('input#search_keyword', email)
                page.evaluate("document.querySelector('#result_mode').selectedIndex = 0")
                time.sleep(0.2)
                page.click('.g-recaptcha.btn_1')
                with recaptchav2.SyncSolver(page) as solver:
                    token = solver.solve_recaptcha(wait=True)
                    print(token)

                tt = time.time()
                try:
                    page.wait_for_selector("pre", timeout=1000)
                except Exception as e:
                    page.screenshot(path="scrapie_error.png")
                    raise e
                print(time.time() - tt)
                page.screenshot(path="scrapie_success.png")
                json_string = re.search(r'<pre.*?>(.*?)</pre>', page.content(), re.DOTALL).group(1)
                json_data = json.loads(json_string)
                browser.close()
                return parse_whoxy(json_data, email)
        except Exception as e:
            print(f"Attempt {attempt + 1} failed in scrape_whoxy: {e}")
            # Optionally, add a delay here before retrying

    print("All attempts failed in scrape_whoxy")
    return None
Lyfhael commented 11 months ago

I'm blind, this fixed it : https://github.com/Xewdy444/Playwright-reCAPTCHA/issues/37

Xewdy444 commented 11 months ago

Here's the best way I found to fill out the form and retrieve the JSON data:

import json

from playwright.sync_api import sync_playwright
from playwright_recaptcha import recaptchav2

with sync_playwright() as playwright:
    browser = playwright.firefox.launch(headless=False)
    page = browser.new_page()
    page.goto("https://www.whoxy.com/reverse-whois/demo.php")

    page.locator('select[name="search_identifier"]').select_option(
        label="Email Address ="
    )

    page.locator('select[name="result_mode"]').select_option(
        label="Default  [100 results, Full Contact Details]"
    )

    page.locator("#search_keyword").fill("xewdy@xewdy.tech")
    page.get_by_role("button", name="Reverse Whois Lookup").click()
    page.wait_for_load_state("networkidle")

    with recaptchav2.SyncSolver(page) as solver:
        if solver.recaptcha_is_visible():
            solver.solve_recaptcha()

    text = page.locator("pre").inner_text()
    json_data = json.loads(text)
Lyfhael commented 10 months ago

I hadn't used your method, kept having issues with my code that would pop randomly. Finally remembered you wrote this and tried it and it works so well, thank you again so much <3