How to load a different model? And how to avoid useless re-downloads?

I noticed that AutoTokenizer.from_pretrained(model_name) will always redownload the model from HF, which seems quite stupid and useless.

I've no idea where the model goes when it's downloaded, but is there any way to just keep it? I don't want to redownload the same thing over and over again.

And if I want to load another model, if I replace mistralai/Mistral-7B-Instruct-v0.1 with e.g. 152334H/miqu-1-70b-sf I get an error about a missing tokenizer, which isn't included in that repository. I think I can just use the llama2-70b tokenizer here, because that's what this model is based on. If I could just put it all in place locally instead of relying on a fresh download from an HF repo that needs to contain everything I could probably piece it together.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 3
      1 model_name = "152334H/miqu-1-70b-sf"
----> 3 tokenizer = AutoTokenizer.from_pretrained(model_name)
      4 tokenizer.pad_token_id = 0
      6 model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

File ~/.cache/pypoetry/virtualenvs/repeng-mpoVJW0L-py3.11/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py:825, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
    821     if tokenizer_class is None:
    822         raise ValueError(
    823             f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
    824         )
--> 825     return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
    827 # Otherwise we have to be creative.
    828 # if model is an encoder decoder, the encoder tokenizer class is used by default
    829 if isinstance(config, EncoderDecoderConfig):

File ~/.cache/pypoetry/virtualenvs/repeng-mpoVJW0L-py3.11/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:2048, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
   2045     else:
   2046         logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2048 return cls._from_pretrained(
   2049     resolved_vocab_files,
   2050     pretrained_model_name_or_path,
   2051     init_configuration,
   2052     *init_inputs,
   2053     token=token,
   2054     cache_dir=cache_dir,
   2055     local_files_only=local_files_only,
   2056     _commit_hash=commit_hash,
   2057     _is_local=is_local,
   2058     trust_remote_code=trust_remote_code,
   2059     **kwargs,
   2060 )

File ~/.cache/pypoetry/virtualenvs/repeng-mpoVJW0L-py3.11/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:2287, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)
   2285 # Instantiate the tokenizer.
   2286 try:
-> 2287     tokenizer = cls(*init_inputs, **init_kwargs)
   2288 except OSError:
   2289     raise OSError(
   2290         "Unable to load vocabulary from file. "
   2291         "Please check that the provided vocabulary is accessible and not corrupted."
   2292     )

File ~/.cache/pypoetry/virtualenvs/repeng-mpoVJW0L-py3.11/lib/python3.11/site-packages/transformers/models/llama/tokenization_llama_fast.py:133, in LlamaTokenizerFast.__init__(self, vocab_file, tokenizer_file, clean_up_tokenization_spaces, unk_token, bos_token, eos_token, add_bos_token, add_eos_token, use_default_system_prompt, add_prefix_space, **kwargs)
    128     logger.warning_once(
    129         "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers"
    130     )
    131     kwargs["from_slow"] = True
--> 133 super().__init__(
    134     vocab_file=vocab_file,
    135     tokenizer_file=tokenizer_file,
    136     clean_up_tokenization_spaces=clean_up_tokenization_spaces,
    137     unk_token=unk_token,
    138     bos_token=bos_token,
    139     eos_token=eos_token,
    140     add_bos_token=add_bos_token,
    141     add_eos_token=add_eos_token,
    142     use_default_system_prompt=use_default_system_prompt,
    143     **kwargs,
    144 )
    145 self._add_bos_token = add_bos_token
    146 self._add_eos_token = add_eos_token

File ~/.cache/pypoetry/virtualenvs/repeng-mpoVJW0L-py3.11/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:120, in PreTrainedTokenizerFast.__init__(self, *args, **kwargs)
    118     fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
    119 else:
--> 120     raise ValueError(
    121         "Couldn't instantiate the backend tokenizer from one of: \n"
    122         "(1) a `tokenizers` library serialization file, \n"
    123         "(2) a slow tokenizer instance to convert or \n"
    124         "(3) an equivalent slow tokenizer class to instantiate and convert. \n"
    125         "You need to have sentencepiece installed to convert a slow tokenizer to a fast one."
    126     )
    128 self._tokenizer = fast_tokenizer
    130 if slow_tokenizer is not None:

ValueError: Couldn't instantiate the backend tokenizer from one of: 
(1) a `tokenizers` library serialization file, 
(2) a slow tokenizer instance to convert or 
(3) an equivalent slow tokenizer class to instantiate and convert. 
You need to have sentencepiece installed to convert a slow tokenizer to a fast one.

vgel / repeng

How to load a different model? And how to avoid useless re-downloads? #21