diff --git "a/msmodelslim/docs/\345\244\247\346\250\241\345\236\213\345\267\262\351\252\214\350\257\201\345\210\227\350\241\250.md" "b/msmodelslim/docs/\345\244\247\346\250\241\345\236\213\345\267\262\351\252\214\350\257\201\345\210\227\350\241\250.md" index d32e304953d337212b2e543f6e0b2f261129ea50..66359c47edd9b7ae8d474a9ebb1a451ec5c4dcbf 100644 --- "a/msmodelslim/docs/\345\244\247\346\250\241\345\236\213\345\267\262\351\252\214\350\257\201\345\210\227\350\241\250.md" +++ "b/msmodelslim/docs/\345\244\247\346\250\241\345\236\213\345\267\262\351\252\214\350\257\201\345\210\227\350\241\250.md" @@ -1,34 +1,64 @@ -| 模型名称 | w8a16 | w8a8| 稀疏量化| -| --- | --- | --- | --- | -| BaiChuan2-13B | - | √ | - | -| BaiChuan2-7B | - | √ | - | -| ChatGLM2-6B | - | √ | √ | -| ChatGLM3-6B | - | - | - | -| CodeGeeX2-6B | - | √ | - | -| CodeLLama-34B | - | √ | √ | -| Gemma-7B | - | √ | - | -| LLaMA2-13B | - | √ | - | -| LLaMA-33B | - | - | √ | -| LLaMA2-70B | √ | √ | - | -| LLaMA2-7B | - | √ | - | -| LLaMA3-70B | √ | - | - | -| LLaMA3.1-70B | - | √ | - | -| LLaMA-65B | √ | √ | - | -| DeepSeek-V2 | - | √ | - | -| Qwen2.5-72B | - | √ | - | -| Qwen2.5-57B | - | √ | - | -| Qwen2.5-32B | - | √ | - | -| Qwen2.5-14B | - | √ | - | -| Qwen2.5-7B | - | √ | - | -| Qwen1.5-32B | - | √ | - | -| Qwen1.5-72B | √ | - | - | -| Qwen1.5-chat 72B | - | - | - | -| Qwen1.5-chat-14B | - | √ | √ | -| Qwen-14B | - | √ | - | -| Qwen-72B | √ | - | - | -| StarCoder-15.5B | - | √ | - | -| starcoder2-15B | - | √ | - | -| llava-1.6-13B | √ | - | - | -| Stable Diffusion XL | - | √ | - | -| Qwen-VL | - | √ | - | -| Llava-1.5-7b-hf | - | √ | - | \ No newline at end of file +| 模型名称 | w8a16 | w8a8| 稀疏量化| kv cache量化| +| --- | --- | --- | --- | --- | +| BaiChuan2-13B | √ | √ | - | - | +| BaiChuan2-7B | √ | √ | - | - | +| bloom-176B | √ | - | - | - | +| ChatGLM2-6B | - | √ | √ | - | +| ChatGLM3-6B | - | - | - | - | +| ChatGLM4-9B | - | √ | √ | √| +| CodeGeeX2-6B | - | √ | - | - | +| CodeLLama-34B | - | √ | √ | - | +| DeepSeek-V2-16B | √ | √ | - | - | +| DeepSeek-V2-236B | √ | √ | - | - | +| DeepSeek-Coder-33B | √ | √ | - | √ | +| DeepSeek-V3 | - | √ | - | - | +| DeepSeek-V3-0324 | - | √ | - | - | +| DeepSeek-R1 | - | √ | - | - | +| DeepSeek-R1-Distill-Llama-8B | - | √ | √ | - | +| DeepSeek-R1-Distill-Llama-70B | - | √ | - | - | +| DeepSeek-R1-Distill-Qwen-1.5B | - | √ | √ | - | +| DeepSeek-R1-Distill-Qwen-7B | - | √ | √ | - | +| DeepSeek-R1-Distill-Qwen-14B | - | √ | √ | - | +| DeepSeek-R1-Distill-Qwen-32B | - | √ | √ | - | +| Gemma-7B | - | √ | - | - | +| Hunyuan-A52B-Instruct | - | √ | - | - | +| InternLM2-20B | √ | √ | - | √ | +| InternVL2-8B | - | √ | - | - | +| InternVL2-40B | - | √ | - | - | +| LLaMA-33B | - | - | √ | - | +| LLaMA-65B | √ | √ | - | - | +| LLaMA2-13B | - | √ | √ | - | +| LLaMA2-7B | - | √ | √ | - | +| LLaMA2-70B | √ | √ | - | - | +| LLaMA3-70B | √ | - | - | - | +| LLaMA3.1-8B | - | √ | - | - | +| LLaMA3.1-70B | - | √ | √ | √ | +| Llava-1.5-7b-hf | - | √ | - | - | +| Llava-1.6-13B | √ | - | - | - | +| Mixtral-8x7B-Instruct | - | √ | - | - | +| Qwen-7B | - | √ | - | - | +| Qwen-14B | - | √ | - | - | +| Qwen-72B | √ | - | - | - | +| Qwen1.5-14B | - | √ | √ | - | +| Qwen1.5-32B | - | √ | - | - | +| Qwen1.5-72B | √ | - | - | - | +| Qwen1.5-110B | √ | - | - | - | +| Qwen2-7B | - | √ | √ | - | +| Qwen2-72B | √ | √ | √ | √ | +| Qwen2.5-7B | - | √ | √ | - | +| Qwen2.5-14B | - | √ | √ | - | +| Qwen2.5-32B | - | √ | - | - | +| Qwen2.5-72B | - | √ | - | √ | +| QwenCode2.5-7B | - | - | √ | - | +| Qwen-VL | - | √ | - | - | +| Qwen2-VL-7B | - | √ | - | - | +| Qwen2-VL-72B | - | √ | - | - | +| QwQ-32B | - | √ | - | - | +| StarCoder-15.5B | - | √ | - | - | +| StarCoder2-15B | - | √ | - | - | +| Stable Diffusion XL | - | √ | - | - | +| Telechat-12B-v2 | - | - | √ | - | +| Telechat-7B | - | √ | - | - | +| Telechat2-7B | - | √ | - | - | +| Telechat2-35B | - | √ | - | - | +| Telechat2-115B | - | √ | - | - | \ No newline at end of file