I tried to backup a large forum as warc (using -f warc as output). However, after approximately 1.1GB of download it started failing for each call with a "Too many open files" error.
It looks like some connections/files are never closed ?
WARNING:root:Traceback (most recent call last):
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connection.py", line 198, in _new_conn
sock = connection.create_connection(
File "/home/username/.local/lib/python3.10/site-packages/urllib3/util/connection.py", line 60, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "/usr/lib/python3.10/socket.py", line 955, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
OSError: [Errno 24] Too many open files
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 793, in urlopen
response = self._make_request(
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 491, in _make_request
raise new_e
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 467, in _make_request
self._validate_conn(conn)
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1099, in _validate_conn
conn.connect()
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connection.py", line 616, in connect
self.sock = sock = self._new_conn()
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connection.py", line 213, in _new_conn
raise NewConnectionError(
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f03ae62f550>: Failed to establish a new connection: [Errno 24] Too many open files
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/username/.local/lib/python3.10/site-packages/requests/adapters.py", line 589, in send
File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 847, in urlopen
retries = retries.increment(
File "/home/username/.local/lib/python3.10/site-packages/urllib3/util/retry.py", line 515, in increment
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='redacted', port=443): Max retries exceeded with url: [redacted] (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f03ae62f550>: Failed to establish a new connection: [Errno 24] Too many open files'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/username/.local/lib/python3.10/site-packages/forum_dl/extractors/common.py", line 360, in _fetch_thread_posts
self.thread_state = yield from self._fetch_thread_page_posts(
File "/home/username/.local/lib/python3.10/site-packages/forum_dl/extractors/common.py", line 439, in _fetch_thread_page_posts
response = self._session.get(state.url)
File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 75, in get
response = self.try_get(
File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 132, in try_get
response = retrying_get(url, params=params, headers=headers, kwargs)
File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 330, in wrapped_f
return self(f, *args, *kw)
File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 467, in call
do = self.iter(retry_state=retry_state)
File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 368, in iter
result = action(retry_state)
File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 410, in exc_check
raise retry_exc.reraise()
File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 183, in reraise
raise self.last_attempt.result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.get_result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in get_result
raise self._exception
File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 470, in call
result = fn(args, kwargs)
File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 130, in retrying_get
return self._do_get(url, params=params, headers=headers, kwargs)
File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 164, in _do_get
return self._session.get(
File "/home/username/.local/lib/python3.10/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, kwargs)
File "/home/username/.local/lib/python3.10/site-packages/requests/sessions.py", line 589, in request
resp = self.send(prep, send_kwargs)
File "/home/username/.local/lib/python3.10/site-packages/requests/sessions.py", line 703, in send
r = adapter.send(request, kwargs)
File "/home/username/.local/lib/python3.10/site-packages/requests/adapters.py", line 622, in send
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='redacted', port=443): Max retries exceeded with url: [redacted] (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f03ae62f550>: Failed to establish a new connection: [Errno 24] Too many open files'))
Hello,
I tried to backup a large forum as warc (using -f warc as output). However, after approximately 1.1GB of download it started failing for each call with a "Too many open files" error.
It looks like some connections/files are never closed ?
WARNING:root:Traceback (most recent call last): File "/home/username/.local/lib/python3.10/site-packages/urllib3/connection.py", line 198, in _new_conn sock = connection.create_connection( File "/home/username/.local/lib/python3.10/site-packages/urllib3/util/connection.py", line 60, in create_connection for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): File "/usr/lib/python3.10/socket.py", line 955, in getaddrinfo for res in _socket.getaddrinfo(host, port, family, type, proto, flags): OSError: [Errno 24] Too many open files
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 793, in urlopen response = self._make_request( File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 491, in _make_request raise new_e File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 467, in _make_request self._validate_conn(conn) File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1099, in _validate_conn conn.connect() File "/home/username/.local/lib/python3.10/site-packages/urllib3/connection.py", line 616, in connect self.sock = sock = self._new_conn() File "/home/username/.local/lib/python3.10/site-packages/urllib3/connection.py", line 213, in _new_conn raise NewConnectionError( urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f03ae62f550>: Failed to establish a new connection: [Errno 24] Too many open files
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/home/username/.local/lib/python3.10/site-packages/requests/adapters.py", line 589, in send File "/home/username/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 847, in urlopen retries = retries.increment( File "/home/username/.local/lib/python3.10/site-packages/urllib3/util/retry.py", line 515, in increment raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='redacted', port=443): Max retries exceeded with url: [redacted] (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f03ae62f550>: Failed to establish a new connection: [Errno 24] Too many open files'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/username/.local/lib/python3.10/site-packages/forum_dl/extractors/common.py", line 360, in _fetch_thread_posts self.thread_state = yield from self._fetch_thread_page_posts( File "/home/username/.local/lib/python3.10/site-packages/forum_dl/extractors/common.py", line 439, in _fetch_thread_page_posts response = self._session.get(state.url) File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 75, in get response = self.try_get( File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 132, in try_get response = retrying_get(url, params=params, headers=headers, kwargs) File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 330, in wrapped_f return self(f, *args, *kw) File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 467, in call do = self.iter(retry_state=retry_state) File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 368, in iter result = action(retry_state) File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 410, in exc_check raise retry_exc.reraise() File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 183, in reraise raise self.last_attempt.result() File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result return self.get_result() File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in get_result raise self._exception File "/home/username/.local/lib/python3.10/site-packages/tenacity/init.py", line 470, in call result = fn(args, kwargs) File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 130, in retrying_get return self._do_get(url, params=params, headers=headers, kwargs) File "/home/username/.local/lib/python3.10/site-packages/forum_dl/session.py", line 164, in _do_get return self._session.get( File "/home/username/.local/lib/python3.10/site-packages/requests/sessions.py", line 602, in get return self.request("GET", url, kwargs) File "/home/username/.local/lib/python3.10/site-packages/requests/sessions.py", line 589, in request resp = self.send(prep, send_kwargs) File "/home/username/.local/lib/python3.10/site-packages/requests/sessions.py", line 703, in send r = adapter.send(request, kwargs) File "/home/username/.local/lib/python3.10/site-packages/requests/adapters.py", line 622, in send requests.exceptions.ConnectionError: HTTPSConnectionPool(host='redacted', port=443): Max retries exceeded with url: [redacted] (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f03ae62f550>: Failed to establish a new connection: [Errno 24] Too many open files'))