ocean-data-factory-sweden / kso

Notebooks to upload/download marine footage, connect to a citizen science project, train machine learning models and publish marine biological observations.
GNU General Public License v3.0
4 stars 12 forks source link

ConnectionResetError in tutorial 8 when aggregating frames in preparation for model training #351

Closed Bergylta closed 5 months ago

Bergylta commented 5 months ago

🐛 Bug

To Reproduce (REQUIRED)

Species: European green crab, Black goby, Round goby, Goldsinny wrasse Threshold: 0.2 Input:

# Run the preparation script
mlp.prepare_dataset(
    agg_df=pp.aggregated_zoo_classifications,
    out_path=output_folder.selected,
    img_size=(720, 540),
    perc_test=percentage_test.value,
)

Output:

ConnectionResetError                      Traceback (most recent call last)
File /usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py:715, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    714 # Make the request on the httplib connection object.
--> 715 httplib_response = self._make_request(
    716     conn,
    717     method,
    718     url,
    719     timeout=timeout_obj,
    720     body=body,
    721     headers=headers,
    722     chunked=chunked,
    723 )
    725 # If we're going to release the connection in ``finally:``, then
    726 # the response doesn't need to know about the connection. Otherwise
    727 # it will also try to release it and we'll have a double-release
    728 # mess.

File /usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py:404, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    403 try:
--> 404     self._validate_conn(conn)
    405 except (SocketTimeout, BaseSSLError) as e:
    406     # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.

File /usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py:1058, in HTTPSConnectionPool._validate_conn(self, conn)
   1057 if not getattr(conn, "sock", None):  # AppEngine might not have  `.sock`
-> 1058     conn.connect()
   1060 if not conn.is_verified:

File /usr/local/lib/python3.8/dist-packages/urllib3/connection.py:419, in HTTPSConnection.connect(self)
    417     context.load_default_certs()
--> 419 self.sock = ssl_wrap_socket(
    420     sock=conn,
    421     keyfile=self.key_file,
    422     certfile=self.cert_file,
    423     key_password=self.key_password,
    424     ca_certs=self.ca_certs,
    425     ca_cert_dir=self.ca_cert_dir,
    426     ca_cert_data=self.ca_cert_data,
    427     server_hostname=server_hostname,
    428     ssl_context=context,
    429     tls_in_tls=tls_in_tls,
    430 )
    432 # If we're using all defaults and the connection
    433 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning
    434 # for the host.

File /usr/local/lib/python3.8/dist-packages/urllib3/util/ssl_.py:449, in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
    448 if send_sni:
--> 449     ssl_sock = _ssl_wrap_socket_impl(
    450         sock, context, tls_in_tls, server_hostname=server_hostname
    451     )
    452 else:

File /usr/local/lib/python3.8/dist-packages/urllib3/util/ssl_.py:493, in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
    492 if server_hostname:
--> 493     return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
    494 else:

File /usr/lib/python3.8/ssl.py:500, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
    494 def wrap_socket(self, sock, server_side=False,
    495                 do_handshake_on_connect=True,
    496                 suppress_ragged_eofs=True,
    497                 server_hostname=None, session=None):
    498     # SSLSocket class handles server_hostname encoding before it calls
    499     # ctx._wrap_socket()
--> 500     return self.sslsocket_class._create(
    501         sock=sock,
    502         server_side=server_side,
    503         do_handshake_on_connect=do_handshake_on_connect,
    504         suppress_ragged_eofs=suppress_ragged_eofs,
    505         server_hostname=server_hostname,
    506         context=self,
    507         session=session
    508     )

File /usr/lib/python3.8/ssl.py:1069, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
   1068             raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1069         self.do_handshake()
   1070 except (OSError, ValueError):

File /usr/lib/python3.8/ssl.py:1338, in SSLSocket.do_handshake(self, block)
   1337         self.settimeout(None)
-> 1338     self._sslobj.do_handshake()
   1339 finally:

ConnectionResetError: [Errno 104] Connection reset by peer

During handling of the above exception, another exception occurred:

ProtocolError                             Traceback (most recent call last)
File /usr/local/lib/python3.8/dist-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    485 try:
--> 486     resp = conn.urlopen(
    487         method=request.method,
    488         url=url,
    489         body=request.body,
    490         headers=request.headers,
    491         redirect=False,
    492         assert_same_host=False,
    493         preload_content=False,
    494         decode_content=False,
    495         retries=self.max_retries,
    496         timeout=timeout,
    497         chunked=chunked,
    498     )
    500 except (ProtocolError, OSError) as err:

File /usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py:799, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    797     e = ProtocolError("Connection aborted.", e)
--> 799 retries = retries.increment(
    800     method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
    801 )
    802 retries.sleep()

File /usr/local/lib/python3.8/dist-packages/urllib3/util/retry.py:550, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    549 if read is False or not self._is_method_retryable(method):
--> 550     raise six.reraise(type(error), error, _stacktrace)
    551 elif read is not None:

File /usr/local/lib/python3.8/dist-packages/urllib3/packages/six.py:769, in reraise(tp, value, tb)
    768 if value.__traceback__ is not tb:
--> 769     raise value.with_traceback(tb)
    770 raise value

File /usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py:715, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    714 # Make the request on the httplib connection object.
--> 715 httplib_response = self._make_request(
    716     conn,
    717     method,
    718     url,
    719     timeout=timeout_obj,
    720     body=body,
    721     headers=headers,
    722     chunked=chunked,
    723 )
    725 # If we're going to release the connection in ``finally:``, then
    726 # the response doesn't need to know about the connection. Otherwise
    727 # it will also try to release it and we'll have a double-release
    728 # mess.

File /usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py:404, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    403 try:
--> 404     self._validate_conn(conn)
    405 except (SocketTimeout, BaseSSLError) as e:
    406     # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.

File /usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py:1058, in HTTPSConnectionPool._validate_conn(self, conn)
   1057 if not getattr(conn, "sock", None):  # AppEngine might not have  `.sock`
-> 1058     conn.connect()
   1060 if not conn.is_verified:

File /usr/local/lib/python3.8/dist-packages/urllib3/connection.py:419, in HTTPSConnection.connect(self)
    417     context.load_default_certs()
--> 419 self.sock = ssl_wrap_socket(
    420     sock=conn,
    421     keyfile=self.key_file,
    422     certfile=self.cert_file,
    423     key_password=self.key_password,
    424     ca_certs=self.ca_certs,
    425     ca_cert_dir=self.ca_cert_dir,
    426     ca_cert_data=self.ca_cert_data,
    427     server_hostname=server_hostname,
    428     ssl_context=context,
    429     tls_in_tls=tls_in_tls,
    430 )
    432 # If we're using all defaults and the connection
    433 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning
    434 # for the host.

File /usr/local/lib/python3.8/dist-packages/urllib3/util/ssl_.py:449, in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
    448 if send_sni:
--> 449     ssl_sock = _ssl_wrap_socket_impl(
    450         sock, context, tls_in_tls, server_hostname=server_hostname
    451     )
    452 else:

File /usr/local/lib/python3.8/dist-packages/urllib3/util/ssl_.py:493, in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
    492 if server_hostname:
--> 493     return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
    494 else:

File /usr/lib/python3.8/ssl.py:500, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
    494 def wrap_socket(self, sock, server_side=False,
    495                 do_handshake_on_connect=True,
    496                 suppress_ragged_eofs=True,
    497                 server_hostname=None, session=None):
    498     # SSLSocket class handles server_hostname encoding before it calls
    499     # ctx._wrap_socket()
--> 500     return self.sslsocket_class._create(
    501         sock=sock,
    502         server_side=server_side,
    503         do_handshake_on_connect=do_handshake_on_connect,
    504         suppress_ragged_eofs=suppress_ragged_eofs,
    505         server_hostname=server_hostname,
    506         context=self,
    507         session=session
    508     )

File /usr/lib/python3.8/ssl.py:1069, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
   1068             raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1069         self.do_handshake()
   1070 except (OSError, ValueError):

File /usr/lib/python3.8/ssl.py:1338, in SSLSocket.do_handshake(self, block)
   1337         self.settimeout(None)
-> 1338     self._sslobj.do_handshake()
   1339 finally:

ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
File /usr/src/app/kso-dev/kso_utils/project.py:1243, in MLProjectProcessor.prepare_dataset.<locals>.on_button_clicked(b)
   1241 self.species_of_interest = species_list.value
   1242 # code for prepare dataset for machine learning
-> 1243 self.modules["yolo_utils"].frame_aggregation(
   1244     project=self.project,
   1245     server_connection=self.server_connection,
   1246     db_connection=self.db_connection,
   1247     out_path=out_path,
   1248     perc_test=perc_test,
   1249     class_list=self.species_of_interest,
   1250     img_size=img_size,
   1251     remove_nulls=remove_nulls,
   1252     track_frames=track_frames,
   1253     n_tracked_frames=n_tracked_frames,
   1254     agg_df=agg_df,
   1255 )

File /usr/src/app/kso-dev/kso_utils/yolo_utils.py:741, in frame_aggregation(project, server_connection, db_connection, out_path, perc_test, class_list, img_size, out_format, remove_nulls, track_frames, n_tracked_frames, agg_df)
    732         for box in bboxes[named_tuple]:
    733             print(filename)
    734             new_rows.append(
    735                 (
    736                     grouped_fields[-1],  # species_id
    737                     filename,
    738                     PIL.Image.open(requests.get(filename, stream=True).raw).size[0]
    739                     if link_bool
    740                     else PIL.Image.open(filename).size[0],
--> 741                     PIL.Image.open(requests.get(filename, stream=True).raw).size[1]
    742                     if link_bool
    743                     else PIL.Image.open(filename).size[1],
    744                 )
    745                 + box
    746             )
    748 ### Final export step
    749 if movie_bool:
    750     # Export full rows

File /usr/local/lib/python3.8/dist-packages/requests/api.py:73, in get(url, params, **kwargs)
     62 def get(url, params=None, **kwargs):
     63     r"""Sends a GET request.
     64 
     65     :param url: URL for the new :class:`Request` object.
   (...)
     70     :rtype: requests.Response
     71     """
---> 73     return request("get", url, params=params, **kwargs)

File /usr/local/lib/python3.8/dist-packages/requests/api.py:59, in request(method, url, **kwargs)
     55 # By using the 'with' statement we are sure the session is closed, thus we
     56 # avoid leaving sockets open which can trigger a ResourceWarning in some
     57 # cases, and look like a memory leak in others.
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)

File /usr/local/lib/python3.8/dist-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File /usr/local/lib/python3.8/dist-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File /usr/local/lib/python3.8/dist-packages/requests/adapters.py:501, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    486     resp = conn.urlopen(
    487         method=request.method,
    488         url=url,
   (...)
    497         chunked=chunked,
    498     )
    500 except (ProtocolError, OSError) as err:
--> 501     raise ConnectionError(err, request=request)
    503 except MaxRetryError as e:
    504     if isinstance(e.reason, ConnectTimeoutError):
    505         # TODO: Remove this in 3.0.0: see #2811

ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

Expected behavior

A clear and concise description of what you expected to happen.

Environment

If applicable, add screenshots to help explain your problem.

Additional context

Add any other context about the problem here.

jannesgg commented 5 months ago

Rate limiting from Zooniverse. Try to limit requests.