Closed zhyncs closed 1 day ago
I left a few comments at https://github.com/sgl-project/sglang/pull/2153
Rename
srt/layers/fused_moe
-> srt/layers/fused_moe_grok
srt/layers/triton_fused_moe
-> srt/layers/fused_moe_triton
Later, we will probably have srt/layers/fused_moe_turbomind
. Then, all these files will be listed adjacently in alphabetical order.
I left a few comments at #2153
This part of the modification has been completed in this PR.
Rename
srt/layers/fused_moe
->srt/layers/fused_moe_grok
srt/layers/triton_fused_moe
->srt/layers/fused_moe_triton
Later, we will probably have
srt/layers/fused_moe_turbomind
. Then, all these files will be listed adjacently in alphabetical order.
I will not modify this part in this PR. It is expected to be updated in follow-up PRs later. The changes are simple, mainly verification. I might update it tomorrow or the day after.
Locally verified ok
[
{
"timestamp": "2024-11-24T04:59:08.994730",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"metrics": {
"en": 0.868,
"en:std": 0.3384907679686405,
"group_latin": 0.868,
"group_latin:std": 0.3384907679686405,
"score:std": 0.3384907679686405,
"score": 0.868
},
"score": 0.868
},
{
"timestamp": "2024-11-24T04:59:57.476733",
"model": "mistralai/Mistral-7B-Instruct-v0.3",
"metrics": {
"en": 0.608,
"en:std": 0.48819668167655544,
"group_latin": 0.608,
"group_latin:std": 0.48819668167655544,
"score:std": 0.48819668167655544,
"score": 0.608
},
"score": 0.608
},
{
"timestamp": "2024-11-24T05:01:26.193959",
"model": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
"metrics": {
"en": 0.868,
"en:std": 0.3384907679686405,
"group_latin": 0.868,
"group_latin:std": 0.3384907679686405,
"score:std": 0.3384907679686405,
"score": 0.868
},
"score": 0.868
},
{
"timestamp": "2024-11-24T05:02:37.732333",
"model": "google/gemma-2-27b-it",
"metrics": {
"en": 0.924,
"en:std": 0.26499811320083017,
"group_latin": 0.924,
"group_latin:std": 0.26499811320083017,
"score:std": 0.26499811320083017,
"score": 0.924
},
"score": 0.924
},
{
"timestamp": "2024-11-24T05:05:53.177669",
"model": "meta-llama/Llama-3.1-70B-Instruct",
"metrics": {
"en": 0.976,
"en:std": 0.15304901175767194,
"group_latin": 0.976,
"group_latin:std": 0.15304901175767194,
"score:std": 0.15304901175767194,
"score": 0.976
},
"score": 0.976
},
{
"timestamp": "2024-11-24T05:08:29.849793",
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"metrics": {
"en": 0.656,
"en:std": 0.4750410508577127,
"group_latin": 0.656,
"group_latin:std": 0.4750410508577127,
"score:std": 0.4750410508577127,
"score": 0.656
},
"score": 0.656
},
{
"timestamp": "2024-11-24T05:12:54.222602",
"model": "Qwen/Qwen2-57B-A14B-Instruct",
"metrics": {
"en": 0.884,
"en:std": 0.320224920954007,
"group_latin": 0.884,
"group_latin:std": 0.320224920954007,
"score:std": 0.320224920954007,
"score": 0.884
},
"score": 0.884
},
{
"timestamp": "2024-11-24T05:14:29.246964",
"model": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
"metrics": {
"en": 0.852,
"en:std": 0.35509998591945907,
"group_latin": 0.852,
"group_latin:std": 0.35509998591945907,
"score:std": 0.35509998591945907,
"score": 0.852
},
"score": 0.852
},
{
"timestamp": "2024-11-24T05:15:34.992883",
"model": "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",
"metrics": {
"en": 0.864,
"en:std": 0.3427885645700568,
"group_latin": 0.864,
"group_latin:std": 0.3427885645700568,
"score:std": 0.3427885645700568,
"score": 0.864
},
"score": 0.864
},
{
"timestamp": "2024-11-24T05:16:33.803717",
"model": "neuralmagic/Mistral-7B-Instruct-v0.3-FP8",
"metrics": {
"en": 0.56,
"en:std": 0.4963869458396342,
"group_latin": 0.56,
"group_latin:std": 0.4963869458396342,
"score:std": 0.4963869458396342,
"score": 0.56
},
"score": 0.56
},
{
"timestamp": "2024-11-24T05:18:15.019246",
"model": "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8",
"metrics": {
"en": 0.86,
"en:std": 0.34698703145794946,
"group_latin": 0.86,
"group_latin:std": 0.34698703145794946,
"score:std": 0.34698703145794946,
"score": 0.86
},
"score": 0.86
},
{
"timestamp": "2024-11-24T05:19:15.519638",
"model": "neuralmagic/gemma-2-2b-it-FP8",
"metrics": {
"en": 0.64,
"en:std": 0.48,
"group_latin": 0.64,
"group_latin:std": 0.48,
"score:std": 0.48,
"score": 0.64
},
"score": 0.64
},
{
"timestamp": "2024-11-24T05:22:28.640542",
"model": "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8",
"metrics": {
"en": 0.964,
"en:std": 0.18629009635512025,
"group_latin": 0.964,
"group_latin:std": 0.18629009635512025,
"score:std": 0.18629009635512025,
"score": 0.964
},
"score": 0.964
},
{
"timestamp": "2024-11-24T05:25:07.266932",
"model": "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8",
"metrics": {
"en": 0.636,
"en:std": 0.481148625686492,
"group_latin": 0.636,
"group_latin:std": 0.481148625686492,
"score:std": 0.481148625686492,
"score": 0.636
},
"score": 0.636
},
{
"timestamp": "2024-11-24T05:28:10.900791",
"model": "neuralmagic/Qwen2-72B-Instruct-FP8",
"metrics": {
"en": 0.956,
"en:std": 0.20509509989270833,
"group_latin": 0.956,
"group_latin:std": 0.20509509989270833,
"score:std": 0.20509509989270833,
"score": 0.956
},
"score": 0.956
},
{
"timestamp": "2024-11-24T05:30:46.679183",
"model": "neuralmagic/Qwen2-57B-A14B-Instruct-FP8",
"metrics": {
"en": 0.844,
"en:std": 0.3628553430776512,
"group_latin": 0.844,
"group_latin:std": 0.3628553430776512,
"score:std": 0.3628553430776512,
"score": 0.844
},
"score": 0.844
},
{
"timestamp": "2024-11-24T05:32:36.080987",
"model": "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8",
"metrics": {
"en": 0.876,
"en:std": 0.3295815528818322,
"group_latin": 0.876,
"group_latin:std": 0.3295815528818322,
"score:std": 0.3295815528818322,
"score": 0.876
},
"score": 0.876
},
{
"timestamp": "2024-11-24T05:33:55.184910",
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"metrics": {
"en": 0.844,
"en:std": 0.3628553430776512,
"group_latin": 0.844,
"group_latin:std": 0.3628553430776512,
"score:std": 0.3628553430776512,
"score": 0.844
},
"score": 0.844
},
{
"timestamp": "2024-11-24T05:35:09.540475",
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4",
"metrics": {
"en": 0.844,
"en:std": 0.3628553430776512,
"group_latin": 0.844,
"group_latin:std": 0.3628553430776512,
"score:std": 0.3628553430776512,
"score": 0.844
},
"score": 0.844
}
]
Motivation
Modifications
Checklist