Open Lareina2441 opened 1 month ago
export HF_ENDPOINT=https://hf-mirror.com
deepspeed --include localhost:0,5 /home/louey/LLaVA/llava/train/train_mem.py \ --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ --deepspeed /home/louey/LLaVA/scripts/zero3.json \ --model_name_or_path liuhaotian/llava-v1.5-13b \ --version v1 \ --data_path /home/louey/LLaVA/data/train/dataset.json \ --image_folder /home/louey/LLaVA/data/images \ --vision_tower openai/clip-vit-large-patch14-336 \ --mm_projector_type mlp2x_gelu \ --mm_vision_select_layer -2 \ --mm_use_im_start_end False \ --mm_use_im_patch_token False \ --image_aspect_ratio pad \ --group_by_modality_length True \ --bf16 True \ --output_dir /home/louey/LLaVA/checkpoints/llava-v1.5-13b-task-lora_1 \ --num_train_epochs 1 \ --per_device_train_batch_size 16 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 1 \ --evaluation_strategy "no" \ --save_strategy "steps" \ --save_steps 50000 \ --save_total_limit 1 \ --learning_rate 2e-4 \ --weight_decay 0. \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --tf32 True \ --model_max_length 2048 \ --gradient_checkpointing True \ --dataloader_num_workers 4 \ --lazy_preprocess True \ --report_to wandb
python /home/louey/LLaVA/scripts/merge_lora_weights.py --model-path /home/louey/LLaVA/checkpoints/llava-v1.5-13b-task-lora_1 --model-base liuhaotian/llava-v1.5-13b --save-model-path /home/louey/LLaVA/fted/llava-ftmodel_1
chmod +x llava-deploy.sh && ./llava-deploy.sh
(base) [louey@gpu01 LLaVA]$ chmod +x llava-deploy.sh && ./llava-deploy.sh [2024-10-11 22:03:38,827] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:03:42 | INFO | controller | args: Namespace(host='0.0.0.0', port=10000, dispatch_method='shortest_queue') 2024-10-11 22:03:42 | INFO | controller | Init controller 2024-10-11 22:03:42 | ERROR | stderr | INFO: Started server process [91287] 2024-10-11 22:03:42 | ERROR | stderr | INFO: Waiting for application startup. 2024-10-11 22:03:42 | ERROR | stderr | INFO: Application startup complete. 2024-10-11 22:03:42 | ERROR | stderr | INFO: Uvicorn running on http://0.0.0.0:10000 (Press CTRL+C to quit) [2024-10-11 22:03:48,670] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:03:55 | INFO | gradio_web_server | args: Namespace(host='0.0.0.0', port=None, controller_url='http://0.0.0.0:10000', concurrency_count=16, model_list_mode='reload', share=True, moderate=False, embed=False) 2024-10-11 22:03:55 | INFO | stdout | INFO: 127.0.0.1:45250 - "POST /refresh_all_workers HTTP/1.1" 200 OK 2024-10-11 22:03:55 | INFO | stdout | INFO: 127.0.0.1:45252 - "POST /list_models HTTP/1.1" 200 OK 2024-10-11 22:03:55 | INFO | gradio_web_server | Models: [] 2024-10-11 22:03:55 | INFO | gradio_web_server | Namespace(host='0.0.0.0', port=None, controller_url='http://0.0.0.0:10000', concurrency_count=16, model_list_mode='reload', share=True, moderate=False, embed=False) 2024-10-11 22:03:55 | ERROR | stderr | /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/components/dropdown.py:163: UserWarning: The value passed into gr.Dropdown() is not in the list of choices. Please update the list of choices to include: or set allow_custom_value=True. 2024-10-11 22:03:55 | ERROR | stderr | warnings.warn( 2024-10-11 22:03:55 | INFO | stdout | Running on local URL: http://0.0.0.0:7860 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: GET http://localhost:7860/startup-events "HTTP/1.1 200 OK" 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK" 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 " 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK" 2024-10-11 22:03:56 | INFO | stdout | IMPORTANT: You are using gradio version 4.16.0, however version 5.0.1 is available, please upgrade. 2024-10-11 22:03:56 | INFO | stdout | -------- 2024-10-11 22:03:57 | INFO | httpx | HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK" 2024-10-11 22:03:57 | INFO | httpx | HTTP Request: POST https://api.gradio.app/gradio-initiated-analytics/ "HTTP/1.1 200 OK" [2024-10-11 22:03:58,630] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:04:02 | INFO | model_worker | args: Namespace(host='0.0.0.0', port=40000, worker_address='http://localhost:40000', controller_address='http://localhost:10000', model_path='llava-ftmodel', model_base=None, model_name=None, device='cuda', multi_modal=False, limit_model_concurrency=5, stream_interval=1, no_register=False, load_8bit=False, load_4bit=False, use_flash_attn=False) 2024-10-11 22:04:02 | INFO | model_worker | Loading the model llava-ftmodel on worker 496f1d ... 2024-10-11 22:04:03 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status 2024-10-11 22:04:03 | ERROR | stderr | response.raise_for_status() 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status 2024-10-11 22:04:03 | ERROR | stderr | raise HTTPError(http_error_msg, response=self) 2024-10-11 22:04:03 | ERROR | stderr | requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://hf-mirror.com/llava-ftmodel/resolve/main/config.json 2024-10-11 22:04:03 | ERROR | stderr | 2024-10-11 22:04:03 | ERROR | stderr | The above exception was the direct cause of the following exception: 2024-10-11 22:04:03 | ERROR | stderr | 2024-10-11 22:04:03 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1261, in hf_hub_download 2024-10-11 22:04:03 | ERROR | stderr | metadata = get_hf_file_metadata( 2024-10-11 22:04:03 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn 2024-10-11 22:04:03 | ERROR | stderr | return fn(*args, **kwargs) 2024-10-11 22:04:03 | ERROR | stderr |
http://<192.168.51.15>:7860
2024-10-11 22:23:26 | INFO | gradio_web_server | Namespace(host='0.0.0.0', port=None, controller_url='http://0.0.0.0:10000', concurrency_count=16, model_list_mode='reload', share=True, moderate=False, embed=False)
2024-10-11 22:23:26 | ERROR | stderr | /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/components/dropdown.py:163: UserWarning: The value passed into gr.Dropdown() is not in the list of choices. Please update the list of choices to include: or set allow_custom_value=True.
2024-10-11 22:23:26 | ERROR | stderr | warnings.warn(
2024-10-11 22:23:26 | INFO | stdout | Running on local URL: http://0.0.0.0:7861
2024-10-11 22:23:26 | INFO | httpx | HTTP Request: GET http://localhost:7861/startup-events "HTTP/1.1 200 OK"
2024-10-11 22:23:26 | INFO | httpx | HTTP Request: HEAD http://localhost:7861/ "HTTP/1.1 200 OK"
2024-10-11 22:23:26 | INFO | httpx | HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
2024-10-11 22:23:27 | INFO | httpx | HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
2024-10-11 22:23:27 | INFO | stdout | IMPORTANT: You are using gradio version 4.16.0, however version 5.0.1 is available, please upgrade.
2024-10-11 22:23:27 | INFO | stdout | --------
2024-10-11 22:23:27 | INFO | httpx | HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
2024-10-11 22:23:27 | INFO | httpx | HTTP Request: POST https://api.gradio.app/gradio-initiated-analytics/ "HTTP/1.1 200 OK"
[2024-10-11 22:23:30,359] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
2024-10-11 22:23:34 | INFO | model_worker | args: Namespace(host='0.0.0.0', port=40000, worker_address='http://localhost:40000', controller_address='http://localhost:10000', model_path='llava-ftmodel', model_base=None, model_name=None, device='cuda', multi_modal=False, limit_model_concurrency=5, stream_interval=1, no_register=False, load_8bit=False, load_4bit=False, use_flash_attn=False)
2024-10-11 22:23:34 | INFO | model_worker | Loading the model llava-ftmodel on worker fc0d3f ...
2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last):
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
2024-10-11 22:23:35 | ERROR | stderr | response.raise_for_status()
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status
2024-10-11 22:23:35 | ERROR | stderr | raise HTTPError(http_error_msg, response=self)
2024-10-11 22:23:35 | ERROR | stderr | requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://hf-mirror.com/llava-ftmodel/resolve/main/config.json
2024-10-11 22:23:35 | ERROR | stderr |
2024-10-11 22:23:35 | ERROR | stderr | The above exception was the direct cause of the following exception:
2024-10-11 22:23:35 | ERROR | stderr |
2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last):
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1261, in hf_hub_download
2024-10-11 22:23:35 | ERROR | stderr | metadata = get_hf_file_metadata(
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
2024-10-11 22:23:35 | ERROR | stderr | return fn(*args, *kwargs)
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1667, in get_hf_file_metadata
2024-10-11 22:23:35 | ERROR | stderr | r = _request_wrapper(
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 385, in _request_wrapper
2024-10-11 22:23:35 | ERROR | stderr | response = _request_wrapper(
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 409, in _request_wrapper
2024-10-11 22:23:35 | ERROR | stderr | hf_raise_for_status(response)
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 362, in hf_raise_for_status
2024-10-11 22:23:35 | ERROR | stderr | raise HfHubHTTPError(str(e), response=response) from e
2024-10-11 22:23:35 | ERROR | stderr | huggingface_hub.utils._errors.HfHubHTTPError: 401 Client Error: Unauthorized for url: https://hf-mirror.com/llava-ftmodel/resolve/main/config.json (Request ID: Root=1-6709365c-088985bc75b9d19a30c9f399;e5942ae6-5cd2-4bd6-a9ca-ed487cd3cc7a)
2024-10-11 22:23:35 | ERROR | stderr |
2024-10-11 22:23:35 | ERROR | stderr | Invalid username or password.
2024-10-11 22:23:35 | ERROR | stderr |
2024-10-11 22:23:35 | ERROR | stderr | The above exception was the direct cause of the following exception:
2024-10-11 22:23:35 | ERROR | stderr |
2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last):
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/utils/hub.py", line 385, in cached_file
2024-10-11 22:23:35 | ERROR | stderr | resolved_file = hf_hub_download(
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
2024-10-11 22:23:35 | ERROR | stderr | return fn(args, kwargs)
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1406, in hf_hub_download
2024-10-11 22:23:35 | ERROR | stderr | raise LocalEntryNotFoundError(
2024-10-11 22:23:35 | ERROR | stderr | huggingface_hub.utils._errors.LocalEntryNotFoundError: An error happened while trying to locate the file on the Hub and we cannot find the requested files in the local cache. Please check your connection and try again or make sure your Internet connection is on.
2024-10-11 22:23:35 | ERROR | stderr |
2024-10-11 22:23:35 | ERROR | stderr | The above exception was the direct cause of the following exception:
2024-10-11 22:23:35 | ERROR | stderr |
2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last):
2024-10-11 22:23:35 | ERROR | stderr | File "
^^^^^^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/home/louey/LLaVA/llava/model/builder.py", line 116, in load_pretrained_model
2024-10-11 22:23:35 | ERROR | stderr | tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py", line 773, in from_pretrained
2024-10-11 22:23:35 | ERROR | stderr | config = AutoConfig.from_pretrained(
2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1100, in from_pretrained
2024-10-11 22:23:35 | ERROR | stderr | config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path,
启动控制器 python -m llava.serve.controller --host 0.0.0.0 --port 10000 启动 gradio 网络服务器。 python -m llava.serve.gradio_web_server --controller http://localhost:10000 --model-list-mode reload 您刚刚启动了 Gradio Web 界面。现在,您可以使用屏幕上显示的 URL 打开 Web 界面。您可能会注意到模型列表中没有模型。不用担心,因为我们尚未启动任何模型工作者。当您启动模型工作者时,它将自动更新。
启动 SGLang 工作者 这是推荐的高吞吐量 LLaVA 模型服务方式,您需要先安装 SGLang。请注意,目前4-bitSGLang-LLaVA 尚不支持量化,如果您的 GPU VRAM 有限,请查看具有量化功能的模型工作器。
pip install "sglang[all]" 您首先将启动一个 SGLang 后端工作器,它将在 GPU 上执行模型。记住--port您已设置的,稍后您将使用它。
CUDA_VISIBLE_DEVICES=0 python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000
CUDA_VISIBLE_DEVICES=0,1 python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-13b --tokenizer-path llava-hf/llava-1.5-13b-hf --port 30000 --tp 2 标记器(临时):llava-hf/llava-1.5-7b-hf,,llava-hf/llava-1.5-13b-hf。liuhaotian/llava-v1.6-34b-tokenizer
然后,您将启动一个 LLaVA-SGLang 工作器,它将在 LLaVA 控制器和 SGLang 后端之间进行通信以路由请求。设置--sgl-endpoint为您刚刚设置的值(默认值:30000)http://127.0.0.1:port。port
python -m llava.serve.sglang_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --sgl-endpoint http://127.0.0.1:30000 推出劳模 这是在 GPU 上执行推理的实际工作器。每个工作器负责 中指定的单个模型--model-path。
python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1.5-13b 等待模型加载过程完成,您会看到“Uvicorn 正在... 上运行”。现在,刷新 Gradio Web UI,您将在模型列表中看到刚刚启动的模型。
您可以启动任意数量的 Worker,并在同一个 Gradio 界面中比较不同的模型检查点。请保持不变,并将和--controller修改为每个 Worker 的不同端口号。--port--worker
python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port <different from 40000, say 40001> --worker http://localhost:<change accordingly, i.e. 40001> --model-path
启动模型工作者(多个 GPU,当 GPU VRAM <= 24GB 时) 如果您的 GPU 的 VRAM 小于 24GB(例如 RTX 3090、RTX 4090 等),您可以尝试使用多个 GPU 运行它。如果您有多个 GPU,我们最新的代码库将自动尝试使用多个 GPU。您可以指定要使用哪些 GPU CUDA_VISIBLE_DEVICES。以下是使用前两个 GPU 运行的示例。
CUDA_VISIBLE_DEVICES=0,1 python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1.5-13b 启动模型工作者(4 位、8 位推理、量化) 您可以使用量化位(4 位、8 位)启动模型工作器,这样您就可以在减少 GPU 内存占用的情况下运行推理,从而有可能在 VRAM 仅为 12GB 的 GPU 上运行。请注意,使用量化位进行推理可能不如全精度模型准确。只需将--load-4bit或附加--load-8bit到您正在执行的模型工作器命令即可。以下是使用 4 位量化运行的示例。
python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1.5-13b --load-4bit 启动模型工作者(LoRA 权重,未合并) 您可以启动带有 LoRA 权重的模型工作器,而无需将其与基本检查点合并,以节省磁盘空间。这将需要额外的加载时间,但推理速度与合并的检查点相同。未合并的 LoRA 检查点lora-merge在模型名称中没有,并且通常比合并的检查点小得多(小于 1GB)(7B 为 13G,13B 为 25G)。
要加载未合并的 LoRA 权重,您只需传递一个附加参数--model-base,即用于训练 LoRA 权重的基础 LLM。您可以在模型库中查看每个 LoRA 权重的基础 LLM 。
python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1-0719-336px-lora-vicuna-13b-v1.3 --model-base lmsys/vicuna-13b-v1.3 CLI 推理
要手动下载 frpc_linux_amd64
文件并将其放置到指定的目录,您可以按照以下步骤操作:
您可以使用以下命令下载文件:
wget https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64
下载完成后,将文件重命名为 frpc_linux_amd64_v0.2
:
mv frpc_linux_amd64 frpc_linux_amd64_v0.2
将文件移动到 gradio
包所在的目录,即 /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio
:
sudo mv frpc_linux_amd64_v0.2 /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/
请确保您有权限访问和写入该目录。如果权限不足,可以使用 sudo
来提升权限。
检查文件是否已成功移动到指定目录:
ls /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/
文件 frpc_linux_amd64_v0.2
应该出现在列表中。
完成这些步骤后,重新运行程序看看问题是否解决。
python -m llava.serve.controller --host 0.0.0.0 --port 10000 & sleep 10 # Wait longer for the controller to start
python -m llava.serve.gradio_web_server --controller http://0.0.0.0:10000 --model-list-mode reload --share & sleep 10 # Wait longer for the web server to start
python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path llava-ftmodel & sleep 10 # Allow more time for the first worker to initialize
python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40001 --worker http://localhost:40001 --model-path liuhaotian/llava-v1.5-13b &
wait
python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000/ --port 40000 --worker http://localhost:40000/ --model-path home/louey/LLaVA/fted/llava-ftmodel_1
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. llava 1.2.2.post1 requires gradio==4.16.0, but you have gradio 4.42.0 which is incompatible. llava 1.2.2.post1 requires gradio-client==0.8.1, but you have gradio-client 1.3.0 which is incompatible. llava 1.2.2.post1 requires httpx==0.24.0, but you have httpx 0.27.2 which is incompatible.
return self.fget.get(instance, owner)() Traceback (most recent call last): File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 791, in urlopen response = self._make_request( ^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 492, in _make_request raise new_e File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 468, in _make_request self._validate_conn(conn) File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1097, in _validate_conn conn.connect() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 642, in connect sock_and_verified = _ssl_wrap_socket_and_match_hostname( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 783, in _ssl_wrap_socket_and_match_hostname ssl_sock = ssl_wrap_socket( ^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 471, in ssl_wrap_socket ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 515, in _ssl_wrap_socket_impl return ssl_context.wrap_socket(sock, server_hostname=server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 517, in wrap_socket return self.sslsocket_class._create( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1108, in _create self.do_handshake() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1383, in do_handshake self._sslobj.do_handshake() ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/adapters.py", line 667, in send resp = conn.urlopen( ^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 845, in urlopen retries = retries.increment( ^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/retry.py", line 470, in increment raise reraise(type(error), error, _stacktrace) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/util.py", line 38, in reraise raise value.with_traceback(tb) File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 791, in urlopen response = self._make_request( ^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 492, in _make_request raise new_e File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 468, in _make_request self._validate_conn(conn) File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1097, in _validate_conn conn.connect() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 642, in connect sock_and_verified = _ssl_wrap_socket_and_match_hostname( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 783, in _ssl_wrap_socket_and_match_hostname ssl_sock = ssl_wrap_socket( ^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 471, in ssl_wrap_socket ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 515, in _ssl_wrap_socket_impl return ssl_context.wrap_socket(sock, server_hostname=server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 517, in wrap_socket return self.sslsocket_class._create( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1108, in _create self.do_handshake() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1383, in do_handshake self._sslobj.do_handshake() urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "
CUDA_VISIBLE_DEVICES=0,1,3,5,6,7 python /home/louey/LLaVA/model_vqa.py \ --model-path /home/louey/LLaVA/fted/llava-ftmodel_1 \ --image-folder /home/louey/LLaVA/data/images \ --question-file /home/louey/LLaVA/data/validation/question.jsonl \ --answers-file /home/louey/LLaVA/data/validation/answer1.jsonl \ --conv-mode llava_v1 \ --num-chunks 1 \ --chunk-idx 0 \ --temperature 0.2 \ --top_p 0.9 \ --num_beams 5
CUDA_VISIBLE_DEVICES=5,6,7 python /home/louey/LLaVA/model_vqa.py \ --model-path /home/louey/LLaVA/fted/llava-ftmodel_1 \ --image-folder /home/louey/LLaVA/data/images \ --question-file /home/louey/LLaVA/data/validation/question.jsonl \ --answers-file /home/louey/LLaVA/data/validation/answer1.jsonl \ --conv-mode llava_v1 \ --num-chunks 1 \ --chunk-idx 0 \ --temperature 0.2 \ --top_p 0.9 \ --num_beams 5
ssh -L8888:localhost:8888 louey@192.168.51.15