arcee-ai / mergekit

Tools for merging pretrained large language models.
GNU Lesser General Public License v3.0
4.88k stars 446 forks source link

Network is unreachable #375

Closed guanfaqian closed 4 months ago

guanfaqian commented 4 months ago

Warmup loader cache: 100%|██████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 2760.50it/s] Executing graph: 100%|██████████████████████████████████████████████████████████████████████████| 2330/2330 [02:57<00:00, 13.10it/s]


OSError Traceback (most recent call last) File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connection.py:198, in HTTPConnection._new_conn(self) 197 try: --> 198 sock = connection.create_connection( 199 (self._dns_host, self.port), 200 self.timeout, 201 source_address=self.source_address, 202 socket_options=self.socket_options, 203 ) 204 except socket.gaierror as e:

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/util/connection.py:85, in create_connection(address, timeout, source_address, socket_options) 84 try: ---> 85 raise err 86 finally: 87 # Break explicitly a reference cycle

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/util/connection.py:73, in create_connection(address, timeout, source_address, socket_options) 72 sock.bind(source_address) ---> 73 sock.connect(sa) 74 # Break explicitly a reference cycle

OSError: [Errno 101] Network is unreachable

The above exception was the direct cause of the following exception:

NewConnectionError Traceback (most recent call last) File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connectionpool.py:793, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, response_kw) 792 # Make the request on the HTTPConnection object --> 793 response = self._make_request( 794 conn, 795 method, 796 url, 797 timeout=timeout_obj, 798 body=body, 799 headers=headers, 800 chunked=chunked, 801 retries=retries, 802 response_conn=response_conn, 803 preload_content=preload_content, 804 decode_content=decode_content, 805 response_kw, 806 ) 808 # Everything went great!

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connectionpool.py:491, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length) 490 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) --> 491 raise new_e 493 # conn.request() calls http.client.*.request, not the method in 494 # urllib3.request. It also calls makefile (recv) on the socket.

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connectionpool.py:467, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length) 466 try: --> 467 self._validate_conn(conn) 468 except (SocketTimeout, BaseSSLError) as e:

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connectionpool.py:1099, in HTTPSConnectionPool._validate_conn(self, conn) 1098 if conn.is_closed: -> 1099 conn.connect() 1101 # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connection.py:616, in HTTPSConnection.connect(self) 615 sock: socket.socket | ssl.SSLSocket --> 616 self.sock = sock = self._new_conn() 617 server_hostname: str = self.host

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connection.py:213, in HTTPConnection._new_conn(self) 212 except OSError as e: --> 213 raise NewConnectionError( 214 self, f"Failed to establish a new connection: {e}" 215 ) from e 217 # Audit hooks are only available in Python 3.8+

NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7a0c6e97b910>: Failed to establish a new connection: [Errno 101] Network is unreachable

The above exception was the direct cause of the following exception:

MaxRetryError Traceback (most recent call last) File ~/anaconda3/envs/merge/lib/python3.10/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies) 485 try: --> 486 resp = conn.urlopen( 487 method=request.method, 488 url=url, 489 body=request.body, 490 headers=request.headers, 491 redirect=False, 492 assert_same_host=False, 493 preload_content=False, 494 decode_content=False, 495 retries=self.max_retries, 496 timeout=timeout, 497 chunked=chunked, 498 ) 500 except (ProtocolError, OSError) as err:

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/connectionpool.py:847, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw) 845 new_e = ProtocolError("Connection aborted.", new_e) --> 847 retries = retries.increment( 848 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2] 849 ) 850 retries.sleep()

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/urllib3/util/retry.py:515, in Retry.increment(self, method, url, response, error, _pool, _stacktrace) 514 reason = error or ResponseError(cause) --> 515 raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] 517 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/models/vicuna_7b_Ogbn_arxiv (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7a0c6e97b910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))

During handling of the above exception, another exception occurred:

ConnectionError Traceback (most recent call last) Cell In[2], line 11 8 with open(CONFIG_YML, "r", encoding="utf-8") as fp: 9 merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp)) ---> 11 run_merge( 12 merge_config, 13 out_path=OUTPUT_PATH, 14 options=MergeOptions( 15 lora_merge_cache=LORA_MERGE_CACHE, 16 cuda=torch.cuda.is_available(), 17 copy_tokenizer=COPY_TOKENIZER, 18 lazy_unpickle=LAZY_UNPICKLE, 19 low_cpu_memory=LOW_CPU_MEMORY, 20 ), 21 ) 22 print("Done!")

File /mnt/drive0/gfq/mergekit/mergekit/merge.py:110, in run_merge(merge_config, out_path, options, config_source) 107 if not config_source: 108 config_source = merge_config.to_yaml() --> 110 card_md = generate_card( 111 config=merge_config, 112 config_yaml=config_source, 113 name=os.path.basename(out_path), 114 ) 115 with open(os.path.join(out_path, "README.md"), "w", encoding="utf-8") as fp: 116 fp.write(card_md)

File /mnt/drive0/gfq/mergekit/mergekit/card.py:169, in generate_card(config, config_yaml, name) 166 if not name: 167 name = "Untitled Model (1)" --> 169 hf_bases = list(extract_hf_paths(config.referenced_models())) 170 tags = ["mergekit", "merge"] 172 actual_base = config.base_model

File /mnt/drive0/gfq/mergekit/mergekit/card.py:99, in extract_hf_paths(models) 92 """ 93 Yields all valid Hugging Face paths from a list of ModelReference objects. 94 95 Args: 96 models: A list of ModelReference objects. 97 """ 98 for model in models: ---> 99 if is_hf(model.model.path): 100 yield model.model.path 102 if model.lora and is_hf(model.lora.path):

File /mnt/drive0/gfq/mergekit/mergekit/card.py:86, in is_hf(path) 84 return True # If path doesn't exist locally, it must be a HF repo 85 try: ---> 86 return huggingface_hub.repo_exists(path, repo_type="model", token=False) 87 except HFValidationError: 88 return False

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114, in validate_hf_hub_args.._inner_fn(*args, *kwargs) 111 if check_use_auth_token: 112 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.name, has_token=has_token, kwargs=kwargs) --> 114 return fn(args, **kwargs)

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2536, in HfApi.repo_exists(self, repo_id, repo_type, token) 2507 """ 2508 Checks if a repository exists on the Hugging Face Hub. 2509 (...) 2533 ``` 2534 """ 2535 try: -> 2536 self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token) 2537 return True 2538 except GatedRepoError:

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114, in validate_hf_hub_args.._inner_fn(*args, *kwargs) 111 if check_use_auth_token: 112 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.name, has_token=has_token, kwargs=kwargs) --> 114 return fn(args, **kwargs)

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2491, in HfApi.repo_info(self, repo_id, revision, repo_type, timeout, files_metadata, token) 2489 else: 2490 raise ValueError("Unsupported repo type.") -> 2491 return method( 2492 repo_id, 2493 revision=revision, 2494 token=token, 2495 timeout=timeout, 2496 files_metadata=files_metadata, 2497 )

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114, in validate_hf_hub_args.._inner_fn(*args, *kwargs) 111 if check_use_auth_token: 112 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.name, has_token=has_token, kwargs=kwargs) --> 114 return fn(args, **kwargs)

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2300, in HfApi.model_info(self, repo_id, revision, timeout, securityStatus, files_metadata, token) 2298 if files_metadata: 2299 params["blobs"] = True -> 2300 r = get_session().get(path, headers=headers, timeout=timeout, params=params) 2301 hf_raise_for_status(r) 2302 data = r.json()

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/requests/sessions.py:602, in Session.get(self, url, kwargs) 594 r"""Sends a GET request. Returns :class:Response object. 595 596 :param url: URL for the new :class:Request object. 597 :param **kwargs: Optional arguments that request takes. 598 :rtype: requests.Response 599 """ 601 kwargs.setdefault("allow_redirects", True) --> 602 return self.request("GET", url, kwargs)

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json) 584 send_kwargs = { 585 "timeout": timeout, 586 "allow_redirects": allow_redirects, 587 } 588 send_kwargs.update(settings) --> 589 resp = self.send(prep, **send_kwargs) 591 return resp

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/requests/sessions.py:703, in Session.send(self, request, kwargs) 700 start = preferred_clock() 702 # Send the request --> 703 r = adapter.send(request, kwargs) 705 # Total elapsed time of the request (approximately) 706 elapsed = preferred_clock() - start

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/huggingface_hub/utils/_http.py:66, in UniqueRequestIdAdapter.send(self, request, *args, *kwargs) 64 """Catch any RequestException to append request id to the error message for debugging.""" 65 try: ---> 66 return super().send(request, args, **kwargs) 67 except requests.RequestException as e: 68 request_id = request.headers.get(X_AMZN_TRACE_ID)

File ~/anaconda3/envs/merge/lib/python3.10/site-packages/requests/adapters.py:519, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies) 515 if isinstance(e.reason, _SSLError): 516 # This branch is for urllib3 v1.22 and later. 517 raise SSLError(e, request=request) --> 519 raise ConnectionError(e, request=request) 521 except ClosedPoolError as e: 522 raise ConnectionError(e, request=request)

ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/models/vicuna_7b_Ogbn_arxiv (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7a0c6e97b910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: ef56227c-a9b5-4450-b762-414f07d2b675)')

metric-space commented 4 months ago

This is very very likely due to the recent huggingface service outage. Closing this as this is not related to mergekit and is more of a network problem.