vladkens / twscrape

2024! Twitter API scrapper with authorization support. Allows you to scrape search results, User's profiles (followers/following), Tweets (favoriters/retweeters) and more.
https://pypi.org/project/twscrape/
MIT License
784 stars 102 forks source link

Proposal: Automatic captcha solving #196

Open ErSauravAdhikari opened 1 month ago

ErSauravAdhikari commented 1 month ago

Integration with the CAPSOLVER_API to solve captchas automatically

Reference:

The above bot does the captcha auto solve.

This is the portion of code referenced from the above packdge to solve captcha

    async def unlock(self):
        if not self.account.status == "LOCKED":
            return

        response, html = await self.request("GET", self._CAPTCHA_URL, bearer=False)
        (
            authenticity_token,
            assignment_token,
            needs_unlock,
            start_button,
            finish_button,
            delete_button,
        ) = parse_unlock_html(html)
        attempt = 1

        if delete_button:
            response, html = await self._confirm_unlock(
                authenticity_token, assignment_token
            )
            (
                authenticity_token,
                assignment_token,
                needs_unlock,
                start_button,
                finish_button,
                delete_button,
            ) = parse_unlock_html(html)

        if start_button or finish_button:
            response, html = await self._confirm_unlock(
                authenticity_token, assignment_token
            )
            (
                authenticity_token,
                assignment_token,
                needs_unlock,
                start_button,
                finish_button,
                delete_button,
            ) = parse_unlock_html(html)

        funcaptcha = {
            "api_key": self.capsolver_api_key,
            "websiteURL": self._CAPTCHA_URL,
            "websitePublicKey": self._CAPTCHA_SITE_KEY,
        }
        if self._session.proxy is not None:
            funcaptcha["captcha_type"] = FunCaptchaTypeEnm.FunCaptchaTask
            funcaptcha["proxyType"] = self._session.proxy.protocol
            funcaptcha["proxyAddress"] = self._session.proxy.host
            funcaptcha["proxyPort"] = self._session.proxy.port
            funcaptcha["proxyLogin"] = self._session.proxy.login
            funcaptcha["proxyPassword"] = self._session.proxy.password
        else:
            funcaptcha["captcha_type"] = FunCaptchaTypeEnm.FunCaptchaTaskProxyLess

        while needs_unlock and attempt <= self.max_unlock_attempts:
            solution = await FunCaptcha(**funcaptcha).aio_captcha_handler()
            if solution.errorId:
                logger.warning(
                    f"(auth_token={self.account.hidden_auth_token}, id={self.account.id}, username={self.account.username})"
                    f"Failed to solve funcaptcha:"
                    f"\n\tUnlock attempt: {attempt}/{self.max_unlock_attempts}"
                    f"\n\tError ID: {solution.errorId}"
                    f"\n\tError code: {solution.errorCode}"
                    f"\n\tError description: {solution.errorDescription}"
                )
                attempt += 1
                continue

            token = solution.solution["token"]
            response, html = await self._confirm_unlock(
                authenticity_token,
                assignment_token,
                verification_string=token,
            )

            if response.url == "https://twitter.com/?lang=en":
                break

            (
                authenticity_token,
                assignment_token,
                needs_unlock,
                start_button,
                finish_button,
                delete_button,
            ) = parse_unlock_html(html)

            if finish_button:
                response, html = await self._confirm_unlock(
                    authenticity_token, assignment_token
                )
                (
                    authenticity_token,
                    assignment_token,
                    needs_unlock,
                    start_button,
                    finish_button,
                    delete_button,
                ) = parse_unlock_html(html)

            attempt += 1

        await self.establish_status()
    async def _confirm_unlock(
        self,
        authenticity_token: str,
        assignment_token: str,
        verification_string: str = None,
    ) -> tuple[requests.Response, str]:
        payload = {
            "authenticity_token": authenticity_token,
            "assignment_token": assignment_token,
            "lang": "en",
            "flow": "",
        }
        if verification_string:
            payload["verification_string"] = verification_string
            payload["language_code"] = "en"

        return await self.request("POST", self._CAPTCHA_URL, data=payload, bearer=False)
ErSauravAdhikari commented 1 month ago

Credits: @alenkimov is the maintainer of the project, with the above code.