Closed nomoneyExpection closed 2 months ago
Here is my modified code:
from transformers import AutoTokenizer, BertModel, RobertaModel, RobertaTokenizerFast, BertTokenizer
def get_tokenlizer(text_encoder_type): if not isinstance(text_encoder_type, str): if hasattr(text_encoder_type, "text_encoder_type"): text_encoder_type = text_encoder_type.text_encoder_type elif text_encoder_type.get("text_encoder_type", False): text_encoder_type = text_encoder_type.get("text_encoder_type") else: raise ValueError( "Unknown type of text_encoder_type: {}".format(type(text_encoder_type)) ) print("final text_encoder_type: {}".format(text_encoder_type))
tokenizer_path = "Grounded-Segment-Anything/huggingface/bert-base-uncased" tokenizer = BertTokenizer.from_pretrained(tokenizer_path, use_fast=False) return tokenizer
def get_pretrained_language_model(text_encoder_type): if text_encoder_type == "bert-base-uncased": model_path = "Grounded-Segment-Anything/huggingface/bert-base-uncased/pytorch_model.bin" return BertModel.from_pretrained(model_path) if text_encoder_type == "roberta-base": return RobertaModel.from_pretrained(text_encoder_type) raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type))
But I still get an error: (gsa) D:\forwork\Grounded-Segment-Anything>python grounded_sam_demo.py --config GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py --grounded_checkpoint groundingdino_swint_ogc.pth --sam_checkpoint sam_vit_h_4b8939.pth --input_image assets/demo1.jpg --output_dir "outputs" --box_threshold 0.3 --text_threshold 0.25 --text_prompt "bear" --device "cuda" D:\Anaconda3\envs\gsa\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3191.) return _VF.meshgrid(tensors, kwargs) # type: ignore[attr-defined] final text_encoder_type: bert-base-uncased Traceback (most recent call last): File "grounded_sam_demo.py", line 181, in model = load_model(config_file, grounded_checkpoint, device=device) File "grounded_sam_demo.py", line 46, in load_model model = build_model(args) File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\modelsinit.py", line 17, in build_model model = build_func(args) File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\models\GroundingDINO\groundingdino.py", line 372, in build_groundingdino model = GroundingDINO( File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\models\GroundingDINO\groundingdino.py", line 107, in init* self.tokenizer = get_tokenlizer.get_tokenlizer(text_encoder_type) File "d:\forwork\grounded-segment-anything\groundingdino\groundingdino\util\get_tokenlizer.py", line 45, in get_tokenlizer tokenizer = BertTokenizer.from_pretrained(tokenizer_path, use_fast=False) File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\tokenization_utils_base.py", line 1654, in from_pretrained fast_tokenizer_file = get_fast_tokenizer_file( File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\tokenization_utils_base.py", line 3486, in get_fast_tokenizer_file all_files = get_list_of_files( File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\file_utils.py", line 2103, in get_list_of_files return list_repo_files(path_or_repo, revision=revision, token=token) File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_deprecation.py", line 103, in inner_f return f(args, **kwargs) File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_validators.py", line 110, in _inner_fn validate_repo_id(arg_value) File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_validators.py", line 158, in validate_repo_id raise HFValidationError( huggingface_hub.utils._validators.HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': 'Grounded-Segment-Anything/huggingface/bert-base-uncased'. Use
repo_type
argument if needed.Is there any good solution?
Originally posted by @nomoneyExpection in #75 (comment)
Sry, maybe you're using the wrong repo_type
arguments for this, you should use BertTokenizer.from_pretrained('bert-base-uncased')
instead of Grounded-Segment-Anything/huggingface/bert-base-uncased
Here is my modified code:
from transformers import AutoTokenizer, BertModel, RobertaModel, RobertaTokenizerFast, BertTokenizer
def get_tokenlizer(text_encoder_type): if not isinstance(text_encoder_type, str): if hasattr(text_encoder_type, "text_encoder_type"): text_encoder_type = text_encoder_type.text_encoder_type elif text_encoder_type.get("text_encoder_type", False): text_encoder_type = text_encoder_type.get("text_encoder_type") else: raise ValueError( "Unknown type of text_encoder_type: {}".format(type(text_encoder_type)) ) print("final text_encoder_type: {}".format(text_encoder_type))
tokenizer_path = "Grounded-Segment-Anything/huggingface/bert-base-uncased" tokenizer = BertTokenizer.from_pretrained(tokenizer_path, use_fast=False) return tokenizer
def get_pretrained_language_model(text_encoder_type): if text_encoder_type == "bert-base-uncased": model_path = "Grounded-Segment-Anything/huggingface/bert-base-uncased/pytorch_model.bin" return BertModel.from_pretrained(model_path) if text_encoder_type == "roberta-base": return RobertaModel.from_pretrained(text_encoder_type) raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type))
But I still get an error: (gsa) D:\forwork\Grounded-Segment-Anything>python grounded_sam_demo.py --config GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py --grounded_checkpoint groundingdino_swint_ogc.pth --sam_checkpoint sam_vit_h_4b8939.pth --input_image assets/demo1.jpg --output_dir "outputs" --box_threshold 0.3 --text_threshold 0.25 --text_prompt "bear" --device "cuda" D:\Anaconda3\envs\gsa\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3191.) return _VF.meshgrid(tensors, kwargs) # type: ignore[attr-defined] final text_encoder_type: bert-base-uncased Traceback (most recent call last): File "grounded_sam_demo.py", line 181, in model = load_model(config_file, grounded_checkpoint, device=device) File "grounded_sam_demo.py", line 46, in load_model model = build_model(args) File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\modelsinit.py", line 17, in build_model model = build_func(args) File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\models\GroundingDINO\groundingdino.py", line 372, in build_groundingdino model = GroundingDINO( File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\models\GroundingDINO\groundingdino.py", line 107, in init* self.tokenizer = get_tokenlizer.get_tokenlizer(text_encoder_type) File "d:\forwork\grounded-segment-anything\groundingdino\groundingdino\util\get_tokenlizer.py", line 45, in get_tokenlizer tokenizer = BertTokenizer.from_pretrained(tokenizer_path, use_fast=False) File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\tokenization_utils_base.py", line 1654, in from_pretrained fast_tokenizer_file = get_fast_tokenizer_file( File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\tokenization_utils_base.py", line 3486, in get_fast_tokenizer_file all_files = get_list_of_files( File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\file_utils.py", line 2103, in get_list_of_files return list_repo_files(path_or_repo, revision=revision, token=token) File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_deprecation.py", line 103, in inner_f return f(args, **kwargs) File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_validators.py", line 110, in _inner_fn validate_repo_id(arg_value) File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_validators.py", line 158, in validate_repo_id raise HFValidationError( huggingface_hub.utils._validators.HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': 'Grounded-Segment-Anything/huggingface/bert-base-uncased'. Use
repo_type
argument if needed.Is there any good solution?
Originally posted by @nomoneyExpection in #75 (comment)
你可以参考一下 https://huggingface.co/bert-base-uncased 这个部分,你用了错误的pretrain的模型的远程仓库名称
from transformers import AutoTokenizer, BertModel, RobertaModel, RobertaTokenizerFast, BertTokenizer
def get_tokenlizer(text_encoder_type): if not isinstance(text_encoder_type, str): if hasattr(text_encoder_type, "text_encoder_type"): text_encoder_type = text_encoder_type.text_encoder_type elif text_encoder_type.get("text_encoder_type", False): text_encoder_type = text_encoder_type.get("text_encoder_type") else: raise ValueError( "Unknown type of text_encoder_type: {}".format(type(text_encoder_type)) ) print("final text_encoder_type: {}".format(text_encoder_type))
def get_pretrained_language_model(text_encoder_type): if text_encoder_type == "bert-base-uncased": model_path = "Grounded-Segment-Anything/huggingface/bert-base-uncased/pytorch_model.bin" return BertModel.from_pretrained(model_path) if text_encoder_type == "roberta-base": return RobertaModel.from_pretrained(text_encoder_type) raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type))
But I still get an error: (gsa) D:\forwork\Grounded-Segment-Anything>python grounded_sam_demo.py --config GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py --grounded_checkpoint groundingdino_swint_ogc.pth --sam_checkpoint sam_vit_h_4b8939.pth --input_image assets/demo1.jpg --output_dir "outputs" --box_threshold 0.3 --text_threshold 0.25 --text_prompt "bear" --device "cuda" D:\Anaconda3\envs\gsa\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3191.) return _VF.meshgrid(tensors, *kwargs) # type: ignore[attr-defined] final text_encoder_type: bert-base-uncased Traceback (most recent call last): File "grounded_sam_demo.py", line 181, in
model = load_model(config_file, grounded_checkpoint, device=device)
File "grounded_sam_demo.py", line 46, in load_model
model = build_model(args)
File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\models__init__.py", line 17, in build_model
model = build_func(args)
File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\models\GroundingDINO\groundingdino.py", line 372, in build_groundingdino
model = GroundingDINO(
File "D:\forwork\Grounded-Segment-Anything\GroundingDINO\groundingdino\models\GroundingDINO\groundingdino.py", line 107, in init
self.tokenizer = get_tokenlizer.get_tokenlizer(text_encoder_type)
File "d:\forwork\grounded-segment-anything\groundingdino\groundingdino\util\get_tokenlizer.py", line 45, in get_tokenlizer
tokenizer = BertTokenizer.from_pretrained(tokenizer_path, use_fast=False)
File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\tokenization_utils_base.py", line 1654, in from_pretrained
fast_tokenizer_file = get_fast_tokenizer_file(
File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\tokenization_utils_base.py", line 3486, in get_fast_tokenizer_file
all_files = get_list_of_files(
File "D:\Anaconda3\envs\gsa\lib\site-packages\transformers\file_utils.py", line 2103, in get_list_of_files
return list_repo_files(path_or_repo, revision=revision, token=token)
File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_deprecation.py", line 103, in inner_f
return f( args, **kwargs)
File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_validators.py", line 110, in _inner_fn
validate_repo_id(arg_value)
File "D:\Anaconda3\envs\gsa\lib\site-packages\huggingface_hub\utils_validators.py", line 158, in validate_repo_id
raise HFValidationError(
huggingface_hub.utils._validators.HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': 'Grounded-Segment-Anything/huggingface/bert-base-uncased'. Use
repo_type
argument if needed.Is there any good solution?
Originally posted by @nomoneyExpection in https://github.com/IDEA-Research/Grounded-Segment-Anything/issues/75#issuecomment-1624577080