Closed ropoctl closed 4 months ago
Copying phi-3's special tokens- https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/raw/main/tokenizer.json
{ "id": 32003, "content": "<|function_output|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32004, "content": "<|tag|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32005, "content": "<|function_call|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32006, "content": "<|system|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32007, "content": "<|end|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32008, "content": "<|raw|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32009, "content": "<|continue|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32010, "content": "<|user|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32011, "content": "<|function_list|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32012, "content": "<|calc|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32013, "content": "<|code|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32014, "content": "<|/code|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32015, "content": "<|summary|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32016, "content": "<|resource|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32017, "content": "<|assistant_mask|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32018, "content": "<|start|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32019, "content": "<|message|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32020, "content": "<|fim_prefix|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32021, "content": "<|fim_middle|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32022, "content": "<|fim_suffix|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32023, "content": "<|meta_start|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32024, "content": "<|ipynb_marker|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32025, "content": "<|diff_marker|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32026, "content": "<|ghissue|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32027, "content": "<|ghreview|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32028, "content": "<|disc_start|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32029, "content": "<|disc_sep|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32030, "content": "<|disc_thread|><|query|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32031, "content": "<|/query|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32032, "content": "<|data|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32033, "content": "<|/data|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32034, "content": "<|sys|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32035, "content": "<|/sys|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32036, "content": "<|inst|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true }, { "id": 32037, "content": "<|/inst|>", "single_word": false, "lstrip": false, "rstrip": true, "normalized": false, "special": true } ],```
Copying phi-3's special tokens- https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/raw/main/tokenizer.json