Xewdy444 / Playwright-reCAPTCHA

A Python library for solving reCAPTCHA v2 and v3 with Playwright
https://pypi.org/project/playwright-recaptcha/
MIT License
240 stars 33 forks source link

How to work through a proxy correctly? #90

Closed Roboxkin closed 5 months ago

Roboxkin commented 5 months ago

I have already asked a similar question, but I don’t fully understand how to do this correctly. for example I have the code

from playwright.sync_api import sync_playwright
from playwright_recaptcha import recaptchav2
from playwright_recaptcha import recaptchav3
proxy = 'https://222.222.222.222:000'
with sync_playwright() as playwright:
    browser = playwright.firefox.launch(
        proxy={
            'server': f'{proxy}'
        }
   )
  page = browser.new_page()
  page.goto(url)

 with recaptchav2.SyncSolver(page) as solver:
     token = solver.solve_recaptcha(wait=True)
     return f'token = "{token}"'

but Google always talks about limits, proxies are constantly changing, but I have the impression that captcha recognition does not go through a proxy. I conclude that I am not connecting to the proxy correctly. I tried

proxy={
    'server': f'{proxy}'
}

and

proxy={
    'http': f'{proxy}'
}

and

proxy={
    'https': f'{proxy}'
}

Please show me how to do it right Thank you

Xewdy444 commented 5 months ago

Are you talking about the Google speech recognition API or the reCAPTCHA challenge itself?

Roboxkin commented 5 months ago

Good morning! Thank you very much for the answer! I'm not using the API, just trying to recognize reCAPTCHA

Roboxkin commented 5 months ago

Here is the complete code I am using

from flask import Flask, request
from playwright_recaptcha import recaptchav2, recaptchav3
from fp.fp import FreeProxy
from colorama import Fore
import numpy as np
import sys
import cv2
import re
import os
import random
import string
import cv2 as cv
import pytesseract
import time

app = Flask(__name__)

print(Fore.GREEN + 'App version 1.0.5')

@app.route("/")
def home_page():
    return " this home page"

@app.route("/recaptcha", methods=["GET"])
def recaptcha():
    recaptcha_type = str(request.args["capa"])
    url = str(request.args["url"])

    def proxy():
        i = 1
        while True:
            try:
                proxy = FreeProxy(google=True).get()
            except:
                status_fail = Fore.RED + 'FAIL'
                print(f'failed to get proxy {status_fail}')
                time.sleep(2)
                if i == 50:
                    status_fail = Fore.RED + 'proxy not found'
                    sys.exit(f'{status_fail}')
                i += 1
            else:
                status_ok = Fore.GREEN + 'OK'
                print(f'proxy {proxy} status {status_ok}')
                break
        return proxy

    proxy_s = proxy()

    if recaptcha_type == 'V2':
        with sync_playwright() as playwright:
            browser = playwright.firefox.launch(
                proxy={
                    'server': f'{proxy_s}'
                }
            )

            page = browser.new_page()
            page.goto(url)

            with recaptchav2.SyncSolver(page) as solver:
                token = solver.solve_recaptcha(wait=True)
                return f'token = "{token}"'

    if recaptcha_type == 'V3':
        with sync_playwright() as playwright:
            browser = playwright.firefox.launch(
                proxy={
                    'server': f'{proxy_s}'
                }
            )
            page = browser.new_page()
            page.goto(url)

            with recaptchav3.SyncSolver(page) as solver:
                token = solver.solve_recaptcha(wait=True)
                return f'token = "{token}"'

if __name__ == "__main__":
    app.run(host="127.0.0.1", port=8888, debug=False)

I get my proxy from here https://pypi.org/project/free-proxy/ I make a GET request through another application, but it looks like this http://127.0.0.1:8888/recaptcha?capa=V2&url=https://www.google.com/recaptcha/api2/demo Sorry, I'm not a professional programmer, I'm just learning and writing through Google translator

Screenshot_2 Screenshot_3

Roboxkin commented 5 months ago

I have some thoughts, I’ll also ask a question about this Is it possible that, for example, a proxy only supports the “http” protocol, but in the request I provide a “url” that contains “https” and it turns out that in fact the proxies do not work because of this?

Xewdy444 commented 5 months ago

The problem seems to be that those free proxies aren't anonymous, so your real IP address is being shown. I ran many successful tests when I passed in a Mullvad SOCKS5 proxy. Here is the code I used:

import random

from flask import Flask, request
from fp.fp import FreeProxy
from playwright.sync_api import sync_playwright
from playwright_recaptcha import recaptchav2, recaptchav3

USER_AGENTS = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.2365.92",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edge/44.18363.8131",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0",
    "Mozilla/5.0 (Windows NT 10.0; WOW64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0",
)

app = Flask(__name__)

def get_proxy() -> str:
    proxy = FreeProxy(anonym=True)
    return proxy.get()

@app.route("/recaptcha", methods=["GET"])
def recaptcha() -> str:
    recaptcha_version = int(request.args.get("version", 2))
    proxy = request.args.get("proxy", get_proxy())
    url = request.args.get("url")

    if url is None:
        return "URL is required", 400

    if recaptcha_version not in (2, 3):
        return "Invalid reCAPTCHA version", 400

    with sync_playwright() as playwright:
        browser = playwright.firefox.launch()

        context = browser.new_context(
            user_agent=random.choice(USER_AGENTS), proxy={"server": proxy}
        )

        page = context.new_page()
        page.goto(url)

        if recaptcha_version == 2:
            with recaptchav2.SyncSolver(page) as solver:
                token = solver.solve_recaptcha(wait=True)

        if recaptcha_version == 3:
            with recaptchav3.SyncSolver(page) as solver:
                token = solver.solve_recaptcha()

        return f'token = "{token}"'

if __name__ == "__main__":
    app.run(host="127.0.0.1", port=8888, debug=False)
Roboxkin commented 5 months ago

Thank you very much for your help! Everything works correctly now! It's nice that there are good people who help solve the problem. Have a good day! Problem solved and thanks again

Roboxkin commented 5 months ago

I rewrote the code a little and am doing ReCaptcha recognition through an error handler, because I was making requests through a third-party application and when the time to solve the captcha ran out, there were errors in the console. I’m also trying to solve captchas from different proxies using a for loop. Despite the fact that after many attempts I was not able to solve the captcha, but now everything is clear that the problem is due to bad proxies, and most importantly, thanks to you, my code is now much better. Thank you very much! Sincerely!

This is what the code looks like now:
from flask import Flask, request
from playwright.sync_api import sync_playwright
from playwright_recaptcha import recaptchav2, recaptchav3
from colorama import Fore
from fp.fp import FreeProxy
import random

app = Flask(__name__)

@app.route("/")
def home_page():
    return "this home page, what did you forget here?"

@app.route("/recaptcha", methods=["GET"])
def recaptcha():
    recaptcha_type = request.args["capa"]
    url = request.args["url"]

    USER_AGENTS = (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.2365.92",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edge/44.18363.8131",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0",
        "Mozilla/5.0 (Windows NT 10.0; WOW64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0",
    )

    if url is None:
        return "URL is required", 400

    if recaptcha_type not in ('V2', 'V3'):
        return "Invalid reCAPTCHA version", 400

    def proxy_server():
        for i in range(1, 11):
            try:
                proxy = FreeProxy(anonym=True, google=True).get()
                text = Fore.GREEN + f'Proxy {proxy} for the solution was successfully received'
                print(text)
                return proxy
            except:
                text = Fore.RED + 'Proxy not received'
                print(text)

    with sync_playwright() as playwright:
        browser = playwright.firefox.launch()
        for i in range(1, 4):
            proxy = proxy_server()

            sim = len(proxy)
            if sim < 10:
                return 'error, proxy not found'

            try:
                context = browser.new_context(
                    user_agent=random.choice(USER_AGENTS), proxy={"server": proxy}
                )

                page = context.new_page()
                page.goto(url)

                if recaptcha_type == 'V2':
                    with recaptchav2.SyncSolver(page) as solver:
                        token = solver.solve_recaptcha(wait=True)

                if recaptcha_type == 'V3':
                    with recaptchav3.SyncSolver(page) as solver:
                        token = solver.solve_recaptcha()

                return f'success token "{token}"'
            except:
                print(Fore.RED + f'ReCaptcha through proxy {proxy} no response')

        return 'ReCaptcha no response'

if __name__ == "__main__":
    app.run(host="127.0.0.1", port=8888, debug=False)

Screenshot