joeyism / linkedin_scraper

A library that scrapes Linkedin for user data
GNU General Public License v3.0
1.98k stars 555 forks source link

Connection refused #164

Closed SaiChrisZHANG closed 1 year ago

SaiChrisZHANG commented 1 year ago

Hi all,

So after trying the package with retrieving information from only 1 profile, I couldn't retrieve another one with the following error, has anybody experienced this type of issue? Any advice would be highly appreciated!


ConnectionRefusedError Traceback (most recent call last) /opt/anaconda3/lib/python3.9/site-packages/urllib3/connection.py in _new_conn(self) 173 try: --> 174 conn = connection.create_connection( 175 (self._dns_host, self.port), self.timeout, **extra_kw

/opt/anaconda3/lib/python3.9/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options) 94 if err is not None: ---> 95 raise err 96

/opt/anaconda3/lib/python3.9/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options) 84 sock.bind(source_address) ---> 85 sock.connect(sa) 86 return sock

ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

NewConnectionError Traceback (most recent call last) /opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 702 # Make the request on the httplib connection object. --> 703 httplib_response = self._make_request( 704 conn,

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, httplib_request_kw) 397 else: --> 398 conn.request(method, url, httplib_request_kw) 399

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connection.py in request(self, method, url, body, headers) 238 headers["User-Agent"] = _get_default_user_agent() --> 239 super(HTTPConnection, self).request(method, url, body=body, headers=headers) 240

/opt/anaconda3/lib/python3.9/http/client.py in request(self, method, url, body, headers, encode_chunked) 1284 """Send a complete request to the server.""" -> 1285 self._send_request(method, url, body, headers, encode_chunked) 1286

/opt/anaconda3/lib/python3.9/http/client.py in _send_request(self, method, url, body, headers, encode_chunked) 1330 body = _encode(body, 'body') -> 1331 self.endheaders(body, encode_chunked=encode_chunked) 1332

/opt/anaconda3/lib/python3.9/http/client.py in endheaders(self, message_body, encode_chunked) 1279 raise CannotSendHeader() -> 1280 self._send_output(message_body, encode_chunked=encode_chunked) 1281

/opt/anaconda3/lib/python3.9/http/client.py in _send_output(self, message_body, encode_chunked) 1039 del self._buffer[:] -> 1040 self.send(msg) 1041

/opt/anaconda3/lib/python3.9/http/client.py in send(self, data) 979 if self.auto_open: --> 980 self.connect() 981 else:

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connection.py in connect(self) 204 def connect(self): --> 205 conn = self._new_conn() 206 self._prepare_conn(conn)

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connection.py in _new_conn(self) 185 except SocketError as e: --> 186 raise NewConnectionError( 187 self, "Failed to establish a new connection: %s" % e

NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7fe8f28dbca0>: Failed to establish a new connection: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

MaxRetryError Traceback (most recent call last) /var/folders/65/z4617xxn6s99cjxp0ssspqhw0000gn/T/ipykernel_58880/2567701434.py in 1 email = "zszs199584@163.com" 2 password = "jiyuanJY1220" ----> 3 actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal 4 person = Person("https://www.linkedin.com/in/ilyalukibanov/", driver=driver)

~/.local/lib/python3.9/site-packages/linkedin_scraper/actions.py in login(driver, email, password, cookie, timeout) 21 email, password = __prompt_email_password() 22 ---> 23 driver.get("https://www.linkedin.com/login") 24 element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username"))) 25

~/.local/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py in get(self, url) 447 def get(self, url: str) -> None: 448 """Loads a web page in the current browser session.""" --> 449 self.execute(Command.GET, {"url": url}) 450 451 @property

~/.local/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params) 436 params["sessionId"] = self.session_id 437 --> 438 response = self.command_executor.execute(driver_command, params) 439 if response: 440 self.error_handler.check_response(response)

~/.local/lib/python3.9/site-packages/selenium/webdriver/remote/remote_connection.py in execute(self, command, params) 288 data = utils.dump_json(params) 289 url = f"{self._url}{path}" --> 290 return self._request(command_info[0], url, body=data) 291 292 def _request(self, method, url, body=None):

~/.local/lib/python3.9/site-packages/selenium/webdriver/remote/remote_connection.py in _request(self, method, url, body) 309 310 if self.keep_alive: --> 311 response = self._conn.request(method, url, body=body, headers=headers) 312 statuscode = response.status 313 else:

/opt/anaconda3/lib/python3.9/site-packages/urllib3/request.py in request(self, method, url, fields, headers, urlopen_kw) 76 ) 77 else: ---> 78 return self.request_encode_body( 79 method, url, fields=fields, headers=headers, urlopen_kw 80 )

/opt/anaconda3/lib/python3.9/site-packages/urllib3/request.py in request_encode_body(self, method, url, fields, headers, encode_multipart, multipart_boundary, urlopen_kw) 168 extra_kw.update(urlopen_kw) 169 --> 170 return self.urlopen(method, url, extra_kw)

/opt/anaconda3/lib/python3.9/site-packages/urllib3/poolmanager.py in urlopen(self, method, url, redirect, kw) 374 response = conn.urlopen(method, url, kw) 375 else: --> 376 response = conn.urlopen(method, u.request_uri, **kw) 377 378 redirect_location = redirect and response.get_redirect_location()

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 813 "Retrying (%r) after connection broken by '%r': %s", retries, err, url 814 ) --> 815 return self.urlopen( 816 method, 817 url,

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 813 "Retrying (%r) after connection broken by '%r': %s", retries, err, url 814 ) --> 815 return self.urlopen( 816 method, 817 url,

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 813 "Retrying (%r) after connection broken by '%r': %s", retries, err, url 814 ) --> 815 return self.urlopen( 816 method, 817 url,

/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 785 e = ProtocolError("Connection aborted.", e) 786 --> 787 retries = retries.increment( 788 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2] 789 )

/opt/anaconda3/lib/python3.9/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace) 590 591 if new_retry.is_exhausted(): --> 592 raise MaxRetryError(_pool, url, error or ResponseError(cause)) 593 594 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPConnectionPool(host='localhost', port=61614): Max retries exceeded with url: /session/1c9e48ea7e75420dc1e200766494645e/url (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fe8f28dbca0>: Failed to establish a new connection: [Errno 61] Connection refused'))

shahabe commented 1 year ago

I have this problem as well. Have you @SaiChrisZHANG found your answer?

shahabe commented 1 year ago

I think I found it. I was using

person = Person(dest_person, driver=driver)

and I realised that this will close the connection once it is done. Therefore, I just need to add close_on_complete=False into the input parameters. The correct one is this:

person = Person(dest_person, driver=driver, close_on_complete=False)