代码拉取完成,页面将自动刷新
# Copyright Huawei Technologies Co., Ltd. 2025. All rights reserved.
import os
import argparse
import sys
from qwen_vl_utils import process_vision_info
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoConfig
current_directory = os.path.dirname(os.path.abspath(__file__))
parent_directory = os.path.abspath(os.path.join(current_directory, '..', ".."))
sys.path.append(parent_directory)
from example.common.utils import cmd_bool
from msmodelslim.pytorch.llm_ptq.anti_outlier import AntiOutlierConfig, AntiOutlier
from msmodelslim.pytorch.llm_ptq.llm_ptq_tools import Calibrator, QuantConfig
CPU = "cpu"
NPU = "npu"
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default='')
parser.add_argument('--calib_images', type=str, default='./coco_pic')
parser.add_argument('--save_directory', type=str, default='')
parser.add_argument('--part_file_size', type=int, default=None)
parser.add_argument('--w_bit', type=int, default=8)
parser.add_argument('--a_bit', type=int, default=8)
parser.add_argument('--device_type', type=str, choices=[CPU, NPU], default=CPU)
parser.add_argument('--trust_remote_code', type=cmd_bool, default=False)
args = parser.parse_args()
# 1.加载模型
device_map = CPU if args.device_type == CPU else "auto"
model = Qwen2VLForConditionalGeneration.from_pretrained(args.model_path,
device_map=device_map,
trust_remote_code=args.trust_remote_code,
torch_dtype="auto",
local_files_only=True).eval()
config = AutoConfig.from_pretrained(args.model_path,
trust_remote_code=args.trust_remote_code,
local_files_only=True)
# 2.加载处理器
processor = AutoProcessor.from_pretrained(args.model_path, local_files_only=True)
# 3.设置回退层
disable_names = []
vision_name = ['visual.merger.mlp.0', 'visual.merger.mlp.2']
llm_name = []
for i in range(config.vision_config.depth):
vision_name.extend([f'visual.blocks.{i}.mlp.fc2'])
for i in range(config.num_hidden_layers):
llm_name.extend([f'model.layers.{i}.mlp.down_proj'])
disable_names.extend(vision_name)
disable_names.extend(llm_name)
# 4.加载校准集
images_list = os.listdir(args.calib_images)
calib_data = []
messageList = []
for i in images_list:
image_path = os.path.join(args.calib_images, i)
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": image_path,
},
{
"type": "text",
"text": "Please describe this picture in detail."
},
]
}
]
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors='pt'
).to(args.device_type)
calib_data.append([inputs['input_ids'], inputs['attention_mask'],
None, None, None, None, None, None, None, None,
inputs['pixel_values'], None, inputs['image_grid_thw'], None])
# 5.异常值抑制
anti_config = AntiOutlierConfig(
w_bit=args.w_bit,
a_bit=args.a_bit,
anti_method="m2",
dev_type=args.device_type,
dev_id=model.device.index,
)
anti_outlier = AntiOutlier(model, calib_data=calib_data, cfg=anti_config)
anti_outlier.process()
# 6.模型量化
quant_config = QuantConfig(
w_bit=args.w_bit,
a_bit=args.a_bit,
disable_names=disable_names,
dev_type=args.device_type,
dev_id=model.device.index,
act_method=2,
mm_tensor=False,
)
calibrator = Calibrator(model, quant_config, calib_data=calib_data, disable_level='L0')
calibrator.run()
# 7.保存权重
calibrator.save(args.save_directory, save_type=["safe_tensor"], part_file_size=args.part_file_size)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。