cognitivecomputations / OpenChatML

144 stars 10 forks source link

Add function calling related tokens #8

Closed ropoctl closed 4 months ago

ropoctl commented 4 months ago

Copying phi-3's special tokens- https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/raw/main/tokenizer.json


{
      "id": 32003,
      "content": "<|function_output|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32004,
      "content": "<|tag|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32005,
      "content": "<|function_call|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32006,
      "content": "<|system|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32007,
      "content": "<|end|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32008,
      "content": "<|raw|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32009,
      "content": "<|continue|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32010,
      "content": "<|user|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32011,
      "content": "<|function_list|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32012,
      "content": "<|calc|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32013,
      "content": "<|code|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32014,
      "content": "<|/code|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32015,
      "content": "<|summary|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32016,
      "content": "<|resource|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32017,
      "content": "<|assistant_mask|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32018,
      "content": "<|start|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32019,
      "content": "<|message|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32020,
      "content": "<|fim_prefix|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32021,
      "content": "<|fim_middle|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32022,
      "content": "<|fim_suffix|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32023,
      "content": "<|meta_start|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32024,
      "content": "<|ipynb_marker|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32025,
      "content": "<|diff_marker|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32026,
      "content": "<|ghissue|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32027,
      "content": "<|ghreview|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32028,
      "content": "<|disc_start|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32029,
      "content": "<|disc_sep|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32030,
      "content": "<|disc_thread|><|query|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32031,
      "content": "<|/query|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32032,
      "content": "<|data|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32033,
      "content": "<|/data|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32034,
      "content": "<|sys|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32035,
      "content": "<|/sys|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32036,
      "content": "<|inst|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    },
    {
      "id": 32037,
      "content": "<|/inst|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": true,
      "normalized": false,
      "special": true
    }
  ],```