sgl-project / sglang

SGLang is a fast serving framework for large language models and vision language models.
https://sgl-project.github.io/
Apache License 2.0
6.22k stars 532 forks source link

feat: update other MoE models deps #2156

Closed zhyncs closed 1 day ago

zhyncs commented 1 day ago

Motivation

Modifications

Checklist

merrymercy commented 1 day ago

I left a few comments at https://github.com/sgl-project/sglang/pull/2153

merrymercy commented 1 day ago

Rename

Later, we will probably have srt/layers/fused_moe_turbomind. Then, all these files will be listed adjacently in alphabetical order.

zhyncs commented 1 day ago

I left a few comments at #2153

This part of the modification has been completed in this PR.

zhyncs commented 1 day ago

Rename

  • srt/layers/fused_moe -> srt/layers/fused_moe_grok
  • srt/layers/triton_fused_moe -> srt/layers/fused_moe_triton

Later, we will probably have srt/layers/fused_moe_turbomind. Then, all these files will be listed adjacently in alphabetical order.

I will not modify this part in this PR. It is expected to be updated in follow-up PRs later. The changes are simple, mainly verification. I might update it tomorrow or the day after.

zhyncs commented 1 day ago

Locally verified ok

[
  {
    "timestamp": "2024-11-24T04:59:08.994730",
    "model": "meta-llama/Llama-3.1-8B-Instruct",
    "metrics": {
      "en": 0.868,
      "en:std": 0.3384907679686405,
      "group_latin": 0.868,
      "group_latin:std": 0.3384907679686405,
      "score:std": 0.3384907679686405,
      "score": 0.868
    },
    "score": 0.868
  },
  {
    "timestamp": "2024-11-24T04:59:57.476733",
    "model": "mistralai/Mistral-7B-Instruct-v0.3",
    "metrics": {
      "en": 0.608,
      "en:std": 0.48819668167655544,
      "group_latin": 0.608,
      "group_latin:std": 0.48819668167655544,
      "score:std": 0.48819668167655544,
      "score": 0.608
    },
    "score": 0.608
  },
  {
    "timestamp": "2024-11-24T05:01:26.193959",
    "model": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
    "metrics": {
      "en": 0.868,
      "en:std": 0.3384907679686405,
      "group_latin": 0.868,
      "group_latin:std": 0.3384907679686405,
      "score:std": 0.3384907679686405,
      "score": 0.868
    },
    "score": 0.868
  },
  {
    "timestamp": "2024-11-24T05:02:37.732333",
    "model": "google/gemma-2-27b-it",
    "metrics": {
      "en": 0.924,
      "en:std": 0.26499811320083017,
      "group_latin": 0.924,
      "group_latin:std": 0.26499811320083017,
      "score:std": 0.26499811320083017,
      "score": 0.924
    },
    "score": 0.924
  },
  {
    "timestamp": "2024-11-24T05:05:53.177669",
    "model": "meta-llama/Llama-3.1-70B-Instruct",
    "metrics": {
      "en": 0.976,
      "en:std": 0.15304901175767194,
      "group_latin": 0.976,
      "group_latin:std": 0.15304901175767194,
      "score:std": 0.15304901175767194,
      "score": 0.976
    },
    "score": 0.976
  },
  {
    "timestamp": "2024-11-24T05:08:29.849793",
    "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "metrics": {
      "en": 0.656,
      "en:std": 0.4750410508577127,
      "group_latin": 0.656,
      "group_latin:std": 0.4750410508577127,
      "score:std": 0.4750410508577127,
      "score": 0.656
    },
    "score": 0.656
  },
  {
    "timestamp": "2024-11-24T05:12:54.222602",
    "model": "Qwen/Qwen2-57B-A14B-Instruct",
    "metrics": {
      "en": 0.884,
      "en:std": 0.320224920954007,
      "group_latin": 0.884,
      "group_latin:std": 0.320224920954007,
      "score:std": 0.320224920954007,
      "score": 0.884
    },
    "score": 0.884
  },
  {
    "timestamp": "2024-11-24T05:14:29.246964",
    "model": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
    "metrics": {
      "en": 0.852,
      "en:std": 0.35509998591945907,
      "group_latin": 0.852,
      "group_latin:std": 0.35509998591945907,
      "score:std": 0.35509998591945907,
      "score": 0.852
    },
    "score": 0.852
  },
  {
    "timestamp": "2024-11-24T05:15:34.992883",
    "model": "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",
    "metrics": {
      "en": 0.864,
      "en:std": 0.3427885645700568,
      "group_latin": 0.864,
      "group_latin:std": 0.3427885645700568,
      "score:std": 0.3427885645700568,
      "score": 0.864
    },
    "score": 0.864
  },
  {
    "timestamp": "2024-11-24T05:16:33.803717",
    "model": "neuralmagic/Mistral-7B-Instruct-v0.3-FP8",
    "metrics": {
      "en": 0.56,
      "en:std": 0.4963869458396342,
      "group_latin": 0.56,
      "group_latin:std": 0.4963869458396342,
      "score:std": 0.4963869458396342,
      "score": 0.56
    },
    "score": 0.56
  },
  {
    "timestamp": "2024-11-24T05:18:15.019246",
    "model": "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8",
    "metrics": {
      "en": 0.86,
      "en:std": 0.34698703145794946,
      "group_latin": 0.86,
      "group_latin:std": 0.34698703145794946,
      "score:std": 0.34698703145794946,
      "score": 0.86
    },
    "score": 0.86
  },
  {
    "timestamp": "2024-11-24T05:19:15.519638",
    "model": "neuralmagic/gemma-2-2b-it-FP8",
    "metrics": {
      "en": 0.64,
      "en:std": 0.48,
      "group_latin": 0.64,
      "group_latin:std": 0.48,
      "score:std": 0.48,
      "score": 0.64
    },
    "score": 0.64
  },
  {
    "timestamp": "2024-11-24T05:22:28.640542",
    "model": "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8",
    "metrics": {
      "en": 0.964,
      "en:std": 0.18629009635512025,
      "group_latin": 0.964,
      "group_latin:std": 0.18629009635512025,
      "score:std": 0.18629009635512025,
      "score": 0.964
    },
    "score": 0.964
  },
  {
    "timestamp": "2024-11-24T05:25:07.266932",
    "model": "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8",
    "metrics": {
      "en": 0.636,
      "en:std": 0.481148625686492,
      "group_latin": 0.636,
      "group_latin:std": 0.481148625686492,
      "score:std": 0.481148625686492,
      "score": 0.636
    },
    "score": 0.636
  },
  {
    "timestamp": "2024-11-24T05:28:10.900791",
    "model": "neuralmagic/Qwen2-72B-Instruct-FP8",
    "metrics": {
      "en": 0.956,
      "en:std": 0.20509509989270833,
      "group_latin": 0.956,
      "group_latin:std": 0.20509509989270833,
      "score:std": 0.20509509989270833,
      "score": 0.956
    },
    "score": 0.956
  },
  {
    "timestamp": "2024-11-24T05:30:46.679183",
    "model": "neuralmagic/Qwen2-57B-A14B-Instruct-FP8",
    "metrics": {
      "en": 0.844,
      "en:std": 0.3628553430776512,
      "group_latin": 0.844,
      "group_latin:std": 0.3628553430776512,
      "score:std": 0.3628553430776512,
      "score": 0.844
    },
    "score": 0.844
  },
  {
    "timestamp": "2024-11-24T05:32:36.080987",
    "model": "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8",
    "metrics": {
      "en": 0.876,
      "en:std": 0.3295815528818322,
      "group_latin": 0.876,
      "group_latin:std": 0.3295815528818322,
      "score:std": 0.3295815528818322,
      "score": 0.876
    },
    "score": 0.876
  },
  {
    "timestamp": "2024-11-24T05:33:55.184910",
    "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
    "metrics": {
      "en": 0.844,
      "en:std": 0.3628553430776512,
      "group_latin": 0.844,
      "group_latin:std": 0.3628553430776512,
      "score:std": 0.3628553430776512,
      "score": 0.844
    },
    "score": 0.844
  },
  {
    "timestamp": "2024-11-24T05:35:09.540475",
    "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4",
    "metrics": {
      "en": 0.844,
      "en:std": 0.3628553430776512,
      "group_latin": 0.844,
      "group_latin:std": 0.3628553430776512,
      "score:std": 0.3628553430776512,
      "score": 0.844
    },
    "score": 0.844
  }
]