Anorov / cloudflare-scrape

A Python module to bypass Cloudflare's anti-bot page.
MIT License
3.38k stars 459 forks source link

Max retries exceeded with url #170

Closed quancore closed 6 years ago

quancore commented 6 years ago

Url trying to reach: https://play.esea.net/index.php?s=servers&d=download_replay&id=10882713

code:

user_agent_addition = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36'}
    session = requests_retry_session()
     token, user_agent_str = get_cookie_token(url)
     user_agent_addition = {'User-Agent': user_agent_str}
      headers = dict(headers.items() | user_agent_addition.items())
      r = retrieve_file(url, session, token, headers)
def retrieve_file(url, session, token = None, headers={}):
    try:
        r = session.get(url, stream=True, headers=headers, timeout = constants.timeout, cookies = {'__cfduid': token['__cfduid']})
        r.raise_for_status()
    except requests.exceptions.HTTPError as errh:
        logger.exception("Http Error: {}".format(errh))
    except requests.exceptions.ConnectionError as errc:
        logger.exception("Error Connecting: {}".format(errc))
    except requests.exceptions.Timeout as errt:
        logger.exception("Timeout Error: {}".format(errt))
    except requests.exceptions.RequestException as err:
        logger.exception("Error: {}".format(err))
    finally:
        return r
def requests_retry_session(
    retries=constants.download_rety_count,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 503, 504),
    session=None
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)

    return session

error:

raise MaxRetryError(_pool, url, error or ResponseError(cause)) urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='play.esea.net', port=443): Max retries exceeded with url: /index.php?s=servers&d=download_replay&id=10882713 (Caused by ResponseError('too many 503 error responses',))

Anorov commented 6 years ago

You need to pass the cf_clearance cookie, too, not just __cfduid.

On Fri, Sep 7, 2018, 3:09 PM quancore notifications@github.com wrote:

Url trying to reach: https://play.esea.net/index.php?s=servers&d=download_replay&id=10882713

code:

user_agent_addition = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36'} session = requests_retry_session() token, user_agent_str = get_cookie_token(url) user_agent_addition = {'User-Agent': user_agent_str} headers = dict(headers.items() | user_agent_addition.items()) r = retrieve_file(url, session, token, headers)

def retrieve_file(url, session, token = None, headers={}): try: r = session.get(url, stream=True, headers=headers, timeout = constants.timeout, cookies = {'cfduid': token['cfduid']}) r.raise_for_status() except requests.exceptions.HTTPError as errh: logger.exception("Http Error: {}".format(errh)) except requests.exceptions.ConnectionError as errc: logger.exception("Error Connecting: {}".format(errc)) except requests.exceptions.Timeout as errt: logger.exception("Timeout Error: {}".format(errt)) except requests.exceptions.RequestException as err: logger.exception("Error: {}".format(err)) finally: return r

def requests_retry_session( retries=constants.download_rety_count, backoff_factor=0.3, status_forcelist=constants.status_forcelist, session=None ): session = session or requests.Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter)

return session

error:

raise MaxRetryError(_pool, url, error or ResponseError(cause)) urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='play.esea.net', port=443): Max retries exceeded with url: /index.php?s=servers&d=download_replay&id=10882713 (Caused by ResponseError('too many 503 error responses',))

— You are receiving this because you are subscribed to this thread. Reply to this email directly, view it on GitHub https://github.com/Anorov/cloudflare-scrape/issues/170, or mute the thread https://github.com/notifications/unsubscribe-auth/AA5FI8dN02_ytA6kWnW4trL3oqyQAYifks5uYnAWgaJpZM4WezJi .

quancore commented 6 years ago

Thanks for your ınstant answer. I wıll try on Monday and gıve a feedback.

quancore commented 6 years ago

Works like a charm.