Lareina2441 commented 1 month ago

ssh -L8888:localhost:8888 louey@192.168.51.15

Lareina2441 commented 1 month ago

export HF_ENDPOINT=https://hf-mirror.com

Lareina2441 commented 1 month ago

deepspeed --include localhost:0,5 /home/louey/LLaVA/llava/train/train_mem.py \ --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ --deepspeed /home/louey/LLaVA/scripts/zero3.json \ --model_name_or_path liuhaotian/llava-v1.5-13b \ --version v1 \ --data_path /home/louey/LLaVA/data/train/dataset.json \ --image_folder /home/louey/LLaVA/data/images \ --vision_tower openai/clip-vit-large-patch14-336 \ --mm_projector_type mlp2x_gelu \ --mm_vision_select_layer -2 \ --mm_use_im_start_end False \ --mm_use_im_patch_token False \ --image_aspect_ratio pad \ --group_by_modality_length True \ --bf16 True \ --output_dir /home/louey/LLaVA/checkpoints/llava-v1.5-13b-task-lora_1 \ --num_train_epochs 1 \ --per_device_train_batch_size 16 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 1 \ --evaluation_strategy "no" \ --save_strategy "steps" \ --save_steps 50000 \ --save_total_limit 1 \ --learning_rate 2e-4 \ --weight_decay 0. \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --tf32 True \ --model_max_length 2048 \ --gradient_checkpointing True \ --dataloader_num_workers 4 \ --lazy_preprocess True \ --report_to wandb

Lareina2441 commented 1 month ago

python /home/louey/LLaVA/scripts/merge_lora_weights.py --model-path /home/louey/LLaVA/checkpoints/llava-v1.5-13b-task-lora_1 --model-base liuhaotian/llava-v1.5-13b --save-model-path /home/louey/LLaVA/fted/llava-ftmodel_1

Lareina2441 commented 1 month ago

wget -L https://raw.githubusercontent.com/brevdev/notebooks/main/assets/llava-deploy.sh

Lareina2441 commented 1 month ago

chmod +x llava-deploy.sh && ./llava-deploy.sh

Lareina2441 commented 1 month ago

https://blog.csdn.net/TYUT_xiaoming/article/details/135935184

Lareina2441 commented 1 month ago

(base) [louey@gpu01 LLaVA]$ chmod +x llava-deploy.sh && ./llava-deploy.sh [2024-10-11 22:03:38,827] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:03:42 | INFO | controller | args: Namespace(host='0.0.0.0', port=10000, dispatch_method='shortest_queue') 2024-10-11 22:03:42 | INFO | controller | Init controller 2024-10-11 22:03:42 | ERROR | stderr | INFO: Started server process [91287] 2024-10-11 22:03:42 | ERROR | stderr | INFO: Waiting for application startup. 2024-10-11 22:03:42 | ERROR | stderr | INFO: Application startup complete. 2024-10-11 22:03:42 | ERROR | stderr | INFO: Uvicorn running on http://0.0.0.0:10000 (Press CTRL+C to quit) [2024-10-11 22:03:48,670] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:03:55 | INFO | gradio_web_server | args: Namespace(host='0.0.0.0', port=None, controller_url='http://0.0.0.0:10000', concurrency_count=16, model_list_mode='reload', share=True, moderate=False, embed=False) 2024-10-11 22:03:55 | INFO | stdout | INFO: 127.0.0.1:45250 - "POST /refresh_all_workers HTTP/1.1" 200 OK 2024-10-11 22:03:55 | INFO | stdout | INFO: 127.0.0.1:45252 - "POST /list_models HTTP/1.1" 200 OK 2024-10-11 22:03:55 | INFO | gradio_web_server | Models: [] 2024-10-11 22:03:55 | INFO | gradio_web_server | Namespace(host='0.0.0.0', port=None, controller_url='http://0.0.0.0:10000', concurrency_count=16, model_list_mode='reload', share=True, moderate=False, embed=False) 2024-10-11 22:03:55 | ERROR | stderr | /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/components/dropdown.py:163: UserWarning: The value passed into gr.Dropdown() is not in the list of choices. Please update the list of choices to include: or set allow_custom_value=True. 2024-10-11 22:03:55 | ERROR | stderr | warnings.warn( 2024-10-11 22:03:55 | INFO | stdout | Running on local URL: http://0.0.0.0:7860 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: GET http://localhost:7860/startup-events "HTTP/1.1 200 OK" 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK" 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 " 2024-10-11 22:03:56 | INFO | httpx | HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK" 2024-10-11 22:03:56 | INFO | stdout | IMPORTANT: You are using gradio version 4.16.0, however version 5.0.1 is available, please upgrade. 2024-10-11 22:03:56 | INFO | stdout | -------- 2024-10-11 22:03:57 | INFO | httpx | HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK" 2024-10-11 22:03:57 | INFO | httpx | HTTP Request: POST https://api.gradio.app/gradio-initiated-analytics/ "HTTP/1.1 200 OK" [2024-10-11 22:03:58,630] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:04:02 | INFO | model_worker | args: Namespace(host='0.0.0.0', port=40000, worker_address='http://localhost:40000', controller_address='http://localhost:10000', model_path='llava-ftmodel', model_base=None, model_name=None, device='cuda', multi_modal=False, limit_model_concurrency=5, stream_interval=1, no_register=False, load_8bit=False, load_4bit=False, use_flash_attn=False) 2024-10-11 22:04:02 | INFO | model_worker | Loading the model llava-ftmodel on worker 496f1d ... 2024-10-11 22:04:03 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status 2024-10-11 22:04:03 | ERROR | stderr | response.raise_for_status() 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status 2024-10-11 22:04:03 | ERROR | stderr | raise HTTPError(http_error_msg, response=self) 2024-10-11 22:04:03 | ERROR | stderr | requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://hf-mirror.com/llava-ftmodel/resolve/main/config.json 2024-10-11 22:04:03 | ERROR | stderr | 2024-10-11 22:04:03 | ERROR | stderr | The above exception was the direct cause of the following exception: 2024-10-11 22:04:03 | ERROR | stderr | 2024-10-11 22:04:03 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1261, in hf_hub_download 2024-10-11 22:04:03 | ERROR | stderr | metadata = get_hf_file_metadata( 2024-10-11 22:04:03 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:04:03 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn 2024-10-11 22:04:03 | ERROR | stderr | return fn(*args, **kwargs) 2024-10-11 22:04:03 | ERROR | stderr |

Lareina2441 commented 1 month ago

http://<192.168.51.15>:7860

Lareina2441 commented 1 month ago

2024-10-11 22:23:26 | INFO | gradio_web_server | Namespace(host='0.0.0.0', port=None, controller_url='http://0.0.0.0:10000', concurrency_count=16, model_list_mode='reload', share=True, moderate=False, embed=False) 2024-10-11 22:23:26 | ERROR | stderr | /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/components/dropdown.py:163: UserWarning: The value passed into gr.Dropdown() is not in the list of choices. Please update the list of choices to include: or set allow_custom_value=True. 2024-10-11 22:23:26 | ERROR | stderr | warnings.warn( 2024-10-11 22:23:26 | INFO | stdout | Running on local URL: http://0.0.0.0:7861 2024-10-11 22:23:26 | INFO | httpx | HTTP Request: GET http://localhost:7861/startup-events "HTTP/1.1 200 OK" 2024-10-11 22:23:26 | INFO | httpx | HTTP Request: HEAD http://localhost:7861/ "HTTP/1.1 200 OK" 2024-10-11 22:23:26 | INFO | httpx | HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 " 2024-10-11 22:23:27 | INFO | httpx | HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK" 2024-10-11 22:23:27 | INFO | stdout | IMPORTANT: You are using gradio version 4.16.0, however version 5.0.1 is available, please upgrade. 2024-10-11 22:23:27 | INFO | stdout | -------- 2024-10-11 22:23:27 | INFO | httpx | HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK" 2024-10-11 22:23:27 | INFO | httpx | HTTP Request: POST https://api.gradio.app/gradio-initiated-analytics/ "HTTP/1.1 200 OK" [2024-10-11 22:23:30,359] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:23:34 | INFO | model_worker | args: Namespace(host='0.0.0.0', port=40000, worker_address='http://localhost:40000', controller_address='http://localhost:10000', model_path='llava-ftmodel', model_base=None, model_name=None, device='cuda', multi_modal=False, limit_model_concurrency=5, stream_interval=1, no_register=False, load_8bit=False, load_4bit=False, use_flash_attn=False) 2024-10-11 22:23:34 | INFO | model_worker | Loading the model llava-ftmodel on worker fc0d3f ... 2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status 2024-10-11 22:23:35 | ERROR | stderr | response.raise_for_status() 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status 2024-10-11 22:23:35 | ERROR | stderr | raise HTTPError(http_error_msg, response=self) 2024-10-11 22:23:35 | ERROR | stderr | requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://hf-mirror.com/llava-ftmodel/resolve/main/config.json 2024-10-11 22:23:35 | ERROR | stderr | 2024-10-11 22:23:35 | ERROR | stderr | The above exception was the direct cause of the following exception: 2024-10-11 22:23:35 | ERROR | stderr | 2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1261, in hf_hub_download 2024-10-11 22:23:35 | ERROR | stderr | metadata = get_hf_file_metadata( 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn 2024-10-11 22:23:35 | ERROR | stderr | return fn(*args, *kwargs) 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1667, in get_hf_file_metadata 2024-10-11 22:23:35 | ERROR | stderr | r = _request_wrapper( 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 385, in _request_wrapper 2024-10-11 22:23:35 | ERROR | stderr | response = _request_wrapper( 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 409, in _request_wrapper 2024-10-11 22:23:35 | ERROR | stderr | hf_raise_for_status(response) 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 362, in hf_raise_for_status 2024-10-11 22:23:35 | ERROR | stderr | raise HfHubHTTPError(str(e), response=response) from e 2024-10-11 22:23:35 | ERROR | stderr | huggingface_hub.utils._errors.HfHubHTTPError: 401 Client Error: Unauthorized for url: https://hf-mirror.com/llava-ftmodel/resolve/main/config.json (Request ID: Root=1-6709365c-088985bc75b9d19a30c9f399;e5942ae6-5cd2-4bd6-a9ca-ed487cd3cc7a) 2024-10-11 22:23:35 | ERROR | stderr | 2024-10-11 22:23:35 | ERROR | stderr | Invalid username or password. 2024-10-11 22:23:35 | ERROR | stderr | 2024-10-11 22:23:35 | ERROR | stderr | The above exception was the direct cause of the following exception: 2024-10-11 22:23:35 | ERROR | stderr | 2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/utils/hub.py", line 385, in cached_file 2024-10-11 22:23:35 | ERROR | stderr | resolved_file = hf_hub_download( 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn 2024-10-11 22:23:35 | ERROR | stderr | return fn(args, kwargs) 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1406, in hf_hub_download 2024-10-11 22:23:35 | ERROR | stderr | raise LocalEntryNotFoundError( 2024-10-11 22:23:35 | ERROR | stderr | huggingface_hub.utils._errors.LocalEntryNotFoundError: An error happened while trying to locate the file on the Hub and we cannot find the requested files in the local cache. Please check your connection and try again or make sure your Internet connection is on. 2024-10-11 22:23:35 | ERROR | stderr | 2024-10-11 22:23:35 | ERROR | stderr | The above exception was the direct cause of the following exception: 2024-10-11 22:23:35 | ERROR | stderr | 2024-10-11 22:23:35 | ERROR | stderr | Traceback (most recent call last): 2024-10-11 22:23:35 | ERROR | stderr | File "", line 198, in _run_module_as_main 2024-10-11 22:23:35 | ERROR | stderr | File "", line 88, in _run_code 2024-10-11 22:23:35 | ERROR | stderr | File "/home/louey/LLaVA/llava/serve/model_worker.py", line 277, in 2024-10-11 22:23:35 | ERROR | stderr | worker = ModelWorker(args.controller_address, 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/home/louey/LLaVA/llava/serve/model_worker.py", line 65, in init 2024-10-11 22:23:35 | ERROR | stderr | self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model( 2024-10-11 22:23:35 | ERROR | stderr |
^^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/home/louey/LLaVA/llava/model/builder.py", line 116, in load_pretrained_model 2024-10-11 22:23:35 | ERROR | stderr | tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py", line 773, in from_pretrained 2024-10-11 22:23:35 | ERROR | stderr | config = AutoConfig.from_pretrained( 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1100, in from_pretrained 2024-10-11 22:23:35 | ERROR | stderr | config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, kwargs) 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/configuration_utils.py", line 634, in get_config_dict 2024-10-11 22:23:35 | ERROR | stderr | config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/configuration_utils.py", line 689, in _get_config_dict 2024-10-11 22:23:35 | ERROR | stderr | resolved_config_file = cached_file( 2024-10-11 22:23:35 | ERROR | stderr | ^^^^^^^^^^^^ 2024-10-11 22:23:35 | ERROR | stderr | File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/transformers/utils/hub.py", line 425, in cached_file 2024-10-11 22:23:35 | ERROR | stderr | raise EnvironmentError( 2024-10-11 22:23:35 | ERROR | stderr | OSError: We couldn't connect to 'https://hf-mirror.com' to load this file, couldn't find it in the cached files and it looks like llava-ftmodel is not the path to a directory containing a file named config.json. 2024-10-11 22:23:35 | ERROR | stderr | Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'. [2024-10-11 22:23:40,332] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect) 2024-10-11 22:23:44 | INFO | model_worker | args: Namespace(host='0.0.0.0', port=40001, worker_address='http://localhost:40001', controller_address='http://localhost:10000', model_path='liuhaotian/llava-v1.5-13b', model_base=None, model_name=None, device='cuda', multi_modal=False, limit_model_concurrency=5, stream_interval=1, no_register=False, load_8bit=False, load_4bit=False, use_flash_attn=False) 2024-10-11 22:23:44 | INFO | model_worker | Loading the model llava-v1.5-13b on worker 91cf24 ... You are using a model of type llava to instantiate a model of type llava_llama. This is not supported for all configurations of models and can yield errors. 2024-10-11 22:23:57 | INFO | stdout | 2024-10-11 22:23:57 | INFO | stdout | Could not create share link. Missing file: /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/frpc_linux_amd64_v0.2. 2024-10-11 22:23:57 | INFO | stdout | 2024-10-11 22:23:57 | INFO | stdout | Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 2024-10-11 22:23:57 | INFO | stdout | 2024-10-11 22:23:57 | INFO | stdout | 1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64 2024-10-11 22:23:57 | INFO | stdout | 2. Rename the downloaded file to: frpc_linux_amd64_v0.2 2024-10-11 22:23:57 | INFO | stdout | 3. Move the file to this location: /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio 2024-10-11 22:23:58 | INFO | httpx | HTTP Request: POST https://api.gradio.app/gradio-error-analytics/ "HTTP/1.1 200 OK" 2024-10-11 22:23:58 | INFO | httpx | HTTP Request: POST https://api.gradio.app/gradio-launched-telemetry/ "HTTP/1.1 200 OK" Loading checkpoint shards: 0%| | 0/3 [00:00< 2024-10-11 22:24:00 | ERROR | stderr | /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/torch/_utils.py:776: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage() 2024-10-11 22:24:00 | ERROR | stderr | return self.fget.get(instance, owner)()

Lareina2441 commented 1 month ago

http://192.168.51.15:7860/

Lareina2441 commented 1 month ago

启动控制器 python -m llava.serve.controller --host 0.0.0.0 --port 10000 启动 gradio 网络服务器。 python -m llava.serve.gradio_web_server --controller http://localhost:10000 --model-list-mode reload 您刚刚启动了 Gradio Web 界面。现在，您可以使用屏幕上显示的 URL 打开 Web 界面。您可能会注意到模型列表中没有模型。不用担心，因为我们尚未启动任何模型工作者。当您启动模型工作者时，它将自动更新。

启动 SGLang 工作者这是推荐的高吞吐量 LLaVA 模型服务方式，您需要先安装 SGLang。请注意，目前4-bitSGLang-LLaVA 尚不支持量化，如果您的 GPU VRAM 有限，请查看具有量化功能的模型工作器。

pip install "sglang[all]" 您首先将启动一个 SGLang 后端工作器，它将在 GPU 上执行模型。记住--port您已设置的，稍后您将使用它。

Single GPU

CUDA_VISIBLE_DEVICES=0 python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000

Multiple GPUs with tensor parallel

CUDA_VISIBLE_DEVICES=0,1 python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-13b --tokenizer-path llava-hf/llava-1.5-13b-hf --port 30000 --tp 2 标记器（临时）：llava-hf/llava-1.5-7b-hf，，llava-hf/llava-1.5-13b-hf。liuhaotian/llava-v1.6-34b-tokenizer

然后，您将启动一个 LLaVA-SGLang 工作器，它将在 LLaVA 控制器和 SGLang 后端之间进行通信以路由请求。设置--sgl-endpoint为您刚刚设置的值（默认值：30000）http://127.0.0.1:port。port

python -m llava.serve.sglang_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --sgl-endpoint http://127.0.0.1:30000 推出劳模这是在 GPU 上执行推理的实际工作器。每个工作器负责中指定的单个模型--model-path。

python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1.5-13b 等待模型加载过程完成，您会看到“Uvicorn 正在... 上运行”。现在，刷新 Gradio Web UI，您将在模型列表中看到刚刚启动的模型。

您可以启动任意数量的 Worker，并在同一个 Gradio 界面中比较不同的模型检查点。请保持不变，并将和--controller修改为每个 Worker 的不同端口号。--port--worker

python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port <different from 40000, say 40001> --worker http://localhost:<change accordingly, i.e. 40001> --model-path 如果您使用的是带有 M1 或 M2 芯片的 Apple 设备，则可以使用以下--device标志指定 mps 设备：--device mps。

启动模型工作者（多个 GPU，当 GPU VRAM <= 24GB 时）如果您的 GPU 的 VRAM 小于 24GB（例如 RTX 3090、RTX 4090 等），您可以尝试使用多个 GPU 运行它。如果您有多个 GPU，我们最新的代码库将自动尝试使用多个 GPU。您可以指定要使用哪些 GPU CUDA_VISIBLE_DEVICES。以下是使用前两个 GPU 运行的示例。

CUDA_VISIBLE_DEVICES=0,1 python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1.5-13b 启动模型工作者（4 位、8 位推理、量化）您可以使用量化位（4 位、8 位）启动模型工作器，这样您就可以在减少 GPU 内存占用的情况下运行推理，从而有可能在 VRAM 仅为 12GB 的 GPU 上运行。请注意，使用量化位进行推理可能不如全精度模型准确。只需将--load-4bit或附加--load-8bit到您正在执行的模型工作器命令即可。以下是使用 4 位量化运行的示例。

python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1.5-13b --load-4bit 启动模型工作者（LoRA 权重，未合并）您可以启动带有 LoRA 权重的模型工作器，而无需将其与基本检查点合并，以节省磁盘空间。这将需要额外的加载时间，但推理速度与合并的检查点相同。未合并的 LoRA 检查点lora-merge在模型名称中没有，并且通常比合并的检查点小得多（小于 1GB）（7B 为 13G，13B 为 25G）。

要加载未合并的 LoRA 权重，您只需传递一个附加参数--model-base，即用于训练 LoRA 权重的基础 LLM。您可以在模型库中查看每个 LoRA 权重的基础 LLM 。

python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1-0719-336px-lora-vicuna-13b-v1.3 --model-base lmsys/vicuna-13b-v1.3 CLI 推理

Lareina2441 commented 1 month ago

要手动下载 frpc_linux_amd64 文件并将其放置到指定的目录，您可以按照以下步骤操作：

1. 下载文件

您可以使用以下命令下载文件：

wget https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64

2. 重命名文件

下载完成后，将文件重命名为 frpc_linux_amd64_v0.2：

mv frpc_linux_amd64 frpc_linux_amd64_v0.2

3. 移动文件到指定目录

将文件移动到 gradio 包所在的目录，即 /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio：

sudo mv frpc_linux_amd64_v0.2 /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/

请确保您有权限访问和写入该目录。如果权限不足，可以使用 sudo 来提升权限。

4. 确认文件位置

检查文件是否已成功移动到指定目录：

ls /usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/gradio/

文件 frpc_linux_amd64_v0.2 应该出现在列表中。

完成这些步骤后，重新运行程序看看问题是否解决。

Lareina2441 commented 1 month ago

!/bin/bash

Start the controller

python -m llava.serve.controller --host 0.0.0.0 --port 10000 & sleep 10 # Wait longer for the controller to start

Start the Gradio web server

python -m llava.serve.gradio_web_server --controller http://0.0.0.0:10000 --model-list-mode reload --share & sleep 10 # Wait longer for the web server to start

Start the first model worker

python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path llava-ftmodel & sleep 10 # Allow more time for the first worker to initialize

Start the second model worker

python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40001 --worker http://localhost:40001 --model-path liuhaotian/llava-v1.5-13b &

Optionally, wait for all background processes to finish

wait

Lareina2441 commented 1 month ago

python -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000/ --port 40000 --worker http://localhost:40000/ --model-path home/louey/LLaVA/fted/llava-ftmodel_1

Lareina2441 commented 1 month ago

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. llava 1.2.2.post1 requires gradio==4.16.0, but you have gradio 4.42.0 which is incompatible. llava 1.2.2.post1 requires gradio-client==0.8.1, but you have gradio-client 1.3.0 which is incompatible. llava 1.2.2.post1 requires httpx==0.24.0, but you have httpx 0.27.2 which is incompatible.

Lareina2441 commented 1 month ago

return self.fget.get(instance, owner)() Traceback (most recent call last): File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 791, in urlopen response = self._make_request( ^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 492, in _make_request raise new_e File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 468, in _make_request self._validate_conn(conn) File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1097, in _validate_conn conn.connect() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 642, in connect sock_and_verified = _ssl_wrap_socket_and_match_hostname( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 783, in _ssl_wrap_socket_and_match_hostname ssl_sock = ssl_wrap_socket( ^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 471, in ssl_wrap_socket ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 515, in _ssl_wrap_socket_impl return ssl_context.wrap_socket(sock, server_hostname=server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 517, in wrap_socket return self.sslsocket_class._create( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1108, in _create self.do_handshake() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1383, in do_handshake self._sslobj.do_handshake() ConnectionResetError: [Errno 104] Connection reset by peer

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/adapters.py", line 667, in send resp = conn.urlopen( ^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 845, in urlopen retries = retries.increment( ^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/retry.py", line 470, in increment raise reraise(type(error), error, _stacktrace) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/util.py", line 38, in reraise raise value.with_traceback(tb) File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 791, in urlopen response = self._make_request( ^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 492, in _make_request raise new_e File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 468, in _make_request self._validate_conn(conn) File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1097, in _validate_conn conn.connect() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 642, in connect sock_and_verified = _ssl_wrap_socket_and_match_hostname( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/urllib3/connection.py", line 783, in _ssl_wrap_socket_and_match_hostname ssl_sock = ssl_wrap_socket( ^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 471, in ssl_wrap_socket ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/PublicAnaconda3/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl.py", line 515, in _ssl_wrap_socket_impl return ssl_context.wrap_socket(sock, server_hostname=server_hostname) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 517, in wrap_socket return self.sslsocket_class._create( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1108, in _create self.do_handshake() File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/ssl.py", line 1383, in do_handshake self._sslobj.do_handshake() urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "", line 198, in _run_module_as_main File "", line 88, in _run_code File "/home/louey/LLaVA/llava/serve/cli.py", line 126, in main(args) File "/home/louey/LLaVA/llava/serve/cli.py", line 58, in main image = load_image(args.image_file) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/louey/LLaVA/llava/serve/cli.py", line 20, in load_image response = requests.get(image_file) ^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/api.py", line 73, in get return request("get", url, params=params, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/api.py", line 59, in request return session.request(method=method, url=url, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/sessions.py", line 589, in request resp = self.send(prep, send_kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/sessions.py", line 703, in send r = adapter.send(request, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Public_Anaconda3/anaconda3/lib/python3.11/site-packages/requests/adapters.py", line 682, in send raise ConnectionError(err, request=request) requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')) (

Lareina2441 commented 1 month ago

CUDA_VISIBLE_DEVICES=0,1,3,5,6,7 python /home/louey/LLaVA/model_vqa.py \ --model-path /home/louey/LLaVA/fted/llava-ftmodel_1 \ --image-folder /home/louey/LLaVA/data/images \ --question-file /home/louey/LLaVA/data/validation/question.jsonl \ --answers-file /home/louey/LLaVA/data/validation/answer1.jsonl \ --conv-mode llava_v1 \ --num-chunks 1 \ --chunk-idx 0 \ --temperature 0.2 \ --top_p 0.9 \ --num_beams 5

Lareina2441 commented 1 month ago

CUDA_VISIBLE_DEVICES=5,6,7 python /home/louey/LLaVA/model_vqa.py \ --model-path /home/louey/LLaVA/fted/llava-ftmodel_1 \ --image-folder /home/louey/LLaVA/data/images \ --question-file /home/louey/LLaVA/data/validation/question.jsonl \ --answers-file /home/louey/LLaVA/data/validation/answer1.jsonl \ --conv-mode llava_v1 \ --num-chunks 1 \ --chunk-idx 0 \ --temperature 0.2 \ --top_p 0.9 \ --num_beams 5

Lareina2441 / LLaVA-Med

13141234 #4

Single GPU

Multiple GPUs with tensor parallel

1. 下载文件

2. 重命名文件

3. 移动文件到指定目录

4. 确认文件位置

!/bin/bash

Start the controller

Start the Gradio web server

Start the first model worker

Start the second model worker

Optionally, wait for all background processes to finish