wkeeling / selenium-wire

Extends Selenium's Python bindings to give you the ability to inspect requests made by the browser.
MIT License
1.9k stars 254 forks source link

Address already in used #453

Closed cooclzw closed 2 years ago

cooclzw commented 2 years ago

def get_driver(executor_url, versions, headless, proxy,port): ip = get_ip() logger.info(f"local ip: {ip}")

executor_url = kwargs.get("selenium_executor_url")

    # firefox_versions = kwargs.get("selenium_firefox_versions")
    # headless = kwargs.get("headless")
    # proxy = kwargs.get("proxy")
    ver = random.choice(get_list(versions))
    capabilities = {
        "browserName": "chrome",
        "browserVersion": ver,
        "selenoid:options": {
            "enableVNC": True,
            "enableVideo": False
        },
        "proxy": {
            "proxyType": "MANUAL",
            "httpProxy": ip+":"+str(port),
            "sslProxy": ip+":"+str(port),
        }
    }
    sw_options = {
        'suppress_connection_errors': False,
        'auto_config': False,
        # "disable_capture": True,
        'port':port,
        'addr': ip,
        'request_storage': 'memory',
    }
    logger.info(f'ip:{proxy}')
    if proxy:
        sw_options.update({'proxy': {
            'http': proxy,
            'https': proxy,
            'no_proxy': 'localhost,127.0.0.1'
        }})
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_experimental_option('prefs', {
        "credentials_enable_service": False,
        "profile.password_manager_enabled": False,
        'profile.default_content_setting_values': {
            'notifications': 2
        },
        "network.proxy.socks_remote_dns": True
    })
    user_agent = f'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_{random.randint(11,15)}_{random.randint(1,7)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(80,94)}.0.{random.randint(4103,4606)}.54 Safari/537.36'
    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument('user-agent=' + user_agent)
    chrome_options.add_argument(f'--proxy-server={ip}:{port}')
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument('--disable-infobars')
    chrome_options.add_argument('--blink-settings=imagesEnabled=false')
    preferences = {
        "webrtc.ip_handling_policy": "disable_non_proxied_udp",
        "webrtc.multiple_routes_enabled": False,
        "webrtc.nonproxied_udp_enabled": False
    }
    chrome_options.add_experimental_option("prefs", preferences)
    if headless:
        chrome_options.add_argument("--headless")
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--no-sandbox')

    driver = None
    try:
        logger.info('webdriver init start')
        driver = webdriver.Remote(
            command_executor=executor_url,
            desired_capabilities=capabilities,
            seleniumwire_options=sw_options,
            options=chrome_options
        )
        logger.info('webdriver init finished')
    except Exception as e:
        logger.warning('webdriver init failed' + str(e))
    return driver

while True: driver = get_driver(executor_url, versions, headless, proxy,port) .... if driver: driver.quit()

in Debian,if the webdriver start failed,I get the Error: the Address always in Used,It looks like mitmproxy has been listening on port 8087

2021-11-24 10:22:19.084 | INFO | MainClass:get_driver:146 - webdriver init start 2021-11-24 10:22:19.094 | WARNING | MainClass:get_driver:155 - webdriver init failed HTTPConnectionPool(host='127.0.0.1', port=4444): Max retries exceeded with url: /wd/hub/session (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10cad36d0>: Failed to establish a new connection: [Errno 61] Connection refused')) 2021-11-24 10:22:19.094 | INFO | main:main:73 - generate cookie finished 2021-11-24 10:22:23.099 | INFO | main:main:70 - start cookie worker 2021-11-24 10:22:23.100 | INFO | MainClass:get_driver:146 - webdriver init start 2021-11-24 10:22:23.104 | WARNING | MainClass:get_driver:155 - webdriver init failed. Error starting proxy server: OSError(48, 'Address already in use')

wkeeling commented 2 years ago

Thanks for this.

Can you show the code that selects the port number that gets passed to get_driver()?

driver = get_driver(executor_url, versions, headless, proxy,port)  # <-- how is the port number created?
cooclzw commented 2 years ago

I setted 8087

cooclzw commented 2 years ago

if setted random port, the port will still be listened, it seems if the selenium started failed, the mitmproxy will not quit and listened the port: COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME Python 2814 xxx 8u IPv6 xxx 0t0 TCP 192.168.10.16:8087 (LISTEN) Python 2814 xxx 8u IPv6 xxx 0t0 TCP 192.168.10.16:8086 (LISTEN) Python 2814 xxx 8u IPv6 xxx 0t0 TCP 192.168.10.16:8085 (LISTEN)

wkeeling commented 2 years ago

If you're running in a loop, you'll need to ensure that the port you pass to Selenium Wire is definitely not in use. Even if Selenium Wire is gracefully shutdown, the port may not have fully closed by the time the next loop invocation happens. It often depends on the underlying OS.

You can use code such as this to ensure that a free port is always selected:

import socket
from contextlib import closing

def get_free_port():
    for port in range(8087, 8187):
        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
            if sock.connect_ex(('0.0.0.0', port)) != 0:
                return port

# Select a port number that is not already in use
port = get_free_port()

driver = get_driver(
    executor_url, 
    versions, 
    headless, 
    proxy,
    port
)