From 14da26ec33143437688f6b5b4636434c8675ac22 Mon Sep 17 00:00:00 2001 From: aojiaosaiban <2993174330@qq.com> Date: Mon, 25 Aug 2025 13:46:30 +0800 Subject: [PATCH 1/4] add MoE support for obfuscation.py --- mindarmour/model_protection/obfuscation.py | 56 +++++++++++++++------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/mindarmour/model_protection/obfuscation.py b/mindarmour/model_protection/obfuscation.py index 083a3c8..0de023c 100644 --- a/mindarmour/model_protection/obfuscation.py +++ b/mindarmour/model_protection/obfuscation.py @@ -248,6 +248,7 @@ class ModelObfuscator: save_metadata = config.get('save_metadata', False) metadata_op_name = config.get('metadata_op') layers = config.get('layers') + experts = config.get('experts') if not layers: if not obf_metadata.get(name): @@ -257,12 +258,21 @@ class ModelObfuscator: saved_metadata[name] = obf_tensor else: for layer in layers: - strTemplate = Template(name) - obf_name = strTemplate.safe_substitute({"layer": str(layer)}) - obf_tensor = self._gen_obfuscate_tensor(config.get('shape'), config.get('type')) - obf_metadata[obf_name] = obf_tensor - if save_metadata: - saved_metadata[name] = obf_tensor + replacements = {"layer": str(layer)} + if experts: + for expert in experts: + replacements["expert"] = str(expert) + obf_name = Template(name).safe_substitute(replacements) + obf_tensor = self._gen_obfuscate_tensor(config.get('shape'), config.get('type')) + obf_metadata[obf_name] = obf_tensor + if save_metadata: + saved_metadata[name] = obf_tensor + else: + obf_name = Template(name).safe_substitute(replacements) + obf_tensor = self._gen_obfuscate_tensor(config.get('shape'), config.get('type')) + obf_metadata[obf_name] = obf_tensor + if save_metadata: + saved_metadata[name] = obf_tensor return obf_metadata, saved_metadata def set_metadata(self, new_metadata): @@ -346,6 +356,7 @@ class ModelObfuscator: raise TypeError('{} should be dict type, but got {}'.format(obf_target, type(obf_target))) target = obf_target.get('target', None) layers = obf_target.get('layers', []) + experts = obf_target.get('experts', []) obf_ops = obf_target.get('weight_obf_ops', None) if not target or not obf_ops: raise KeyError("target or obf_ops is None.") @@ -359,16 +370,29 @@ class ModelObfuscator: LOGGER.info(TAG, "obfuscate weight: {} success.".format(item)) not_obfuscated_params.remove(item) for layer in layers: - strTemplate = Template(target) - target_path = strTemplate.safe_substitute({"layer": str(layer)}) - if target_path == param_path: - obf_param = _obfuscate_param(param, obf_metadata, obf_ops, layer) - if obf_param is None: - LOGGER.error(TAG, "obfuscate weight {} failed.".format(item)) - return False - params[item] = obf_param - LOGGER.info(TAG, "obfuscate weight: {} success.".format(item)) - not_obfuscated_params.remove(item) + replacements = {"layer": str(layer)} + if experts: + for expert in experts: + replacements["expert"] = str(expert) + target_path = Template(target).safe_substitute(replacements) + if target_path == param_path: + obf_param = _obfuscate_param(param, obf_metadata, obf_ops, layer) + if obf_param is None: + LOGGER.error(TAG, "obfuscate weight {} failed.".format(item)) + return False + params[item] = obf_param + LOGGER.info(TAG, "obfuscate weight: {} success.".format(item)) + not_obfuscated_params.remove(item) + else: + target_path = Template(target).safe_substitute(replacements) + if target_path == param_path: + obf_param = _obfuscate_param(param, obf_metadata, obf_ops, layer) + if obf_param is None: + LOGGER.error(TAG, "obfuscate weight {} failed.".format(item)) + return False + params[item] = obf_param + LOGGER.info(TAG, "obfuscate weight: {} success.".format(item)) + not_obfuscated_params.remove(item) return True def _obfuscate_safetensor_files(self, src_path, saved_path='./'): -- Gitee From 3b6f6a0b20d8f13da780b5e5b54a024885c17b10 Mon Sep 17 00:00:00 2001 From: aojiaosaiban <2993174330@qq.com> Date: Mon, 25 Aug 2025 20:42:03 +0800 Subject: [PATCH 2/4] add support for deepseekv3 weight obfuscation and infernece under obfuscation --- .../infer/deepseekv3_obfuscate_inference.py | 54 +++ .../infer/network_patch/ObfuscateFreqsMgr.py | 370 ++++++++++++++++++ .../deepseekv3_ms_network_obfuscate.py | 196 ++++++++++ .../config/deepseekv3_obf_config.yaml | 316 +++++++++++++++ .../deepseekv3_weight_obfuscate.py | 119 ++++++ 5 files changed, 1055 insertions(+) create mode 100644 examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py create mode 100644 examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py create mode 100644 examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py create mode 100644 examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml create mode 100644 examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py diff --git a/examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py b/examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py new file mode 100644 index 0000000..63d97ad --- /dev/null +++ b/examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py @@ -0,0 +1,54 @@ +# Copyright 2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from network_patch import deepseekv3_ms_network_obfuscate # Add this line on the top of script. +# from network_patch import qwen2_5_7b_instruct_ms_network_obfuscate +import time +import sys +import vllm_mindspore +from vllm import LLM, SamplingParams + +# Sample prompts. +prompts = [ + "I am", + "Today is", + "Llama is" +] + +def test_deepseekv3_obfuscate_inference(model_path): + # Create a sampling params object. + sampling_params = SamplingParams(temperature=0.0, top_p=0.95, max_tokens=50) + + # Create a LLM + llm = LLM(model=model_path, tensor_parallel_size=2) # Path of the obfuscated weight files + # Generate texts from the prompts. The output is a list of RequestOutput objects + # that contain the prompt, generated text, and other information. + start_time = time.perf_counter() + outputs = llm.generate(prompts, sampling_params) + end_time = time.perf_counter() + elapsed_time = end_time - start_time + # Print the outputs. + for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}. Generated text: {generated_text!r}") + print(f"Infernce time: {elapsed_time} seconds") + +if __name__ == '__main__': + if len(sys.argv) != 2: + print("Usage: python deepseekv3_obfuscate_inference.py ") + + sys.exit(1) + model_path = sys.argv[1] + test_deepseekv3_obfuscate_inference(model_path) \ No newline at end of file diff --git a/examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py b/examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py new file mode 100644 index 0000000..bb103d6 --- /dev/null +++ b/examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py @@ -0,0 +1,370 @@ +import numpy as np +import math +from enum import Enum + +from mindspore.ops import operations as P +from mindspore.common.initializer import initializer, Tensor, Normal +import mindspore.common.dtype as mstype +from mindspore.nn.cell import Cell +from mindformers.tools.utils import is_pynative + +from mindspore.parallel._utils import _get_parallel_mode, _is_sharding_propagation +from mindspore.context import ParallelMode + +from mindformers.modules.layers import _yarn_find_correction_dim, _yarn_find_correction_range, _yarn_get_mscale, _yarn_linear_ramp_mask, _check_llama3_scaling_factor, _check_yarn_scaling_factor, _check_linear_scaling_factor, SeqExtendMethod + +class ObfuscateFreqsMgr(Cell): + r"""freqs_cis manager.""" + + def __init__(self, + head_dim, + seq_length=None, + max_position_embedding=4096, + rotary_dtype=mstype.float16, + theta=10000, + scaling_factor=1.0, + extend_method=SeqExtendMethod.NONE.value, + parallel_config=None, + is_dynamic=False, + limit_not_apply_seq_pipe=False): + super().__init__() + self.is_pynative = is_pynative() + if seq_length is not None and seq_length > max_position_embedding: + max_position_embedding = seq_length + if extend_method == SeqExtendMethod.NTK.value: + theta *= scaling_factor + freqs_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) # (head_dim // 2, ) + freqs = 1.0 / (theta ** (freqs_base / head_dim)) # (head_dim // 2, ) + mscale = 1.0 + if extend_method == SeqExtendMethod.LINEAR.value: + _check_linear_scaling_factor(scaling_factor) + factor = scaling_factor["factor"] + freqs /= factor + + if extend_method == SeqExtendMethod.YARN.value: + _check_yarn_scaling_factor(scaling_factor, max_position_embedding) + factor = scaling_factor["factor"] + beta_fast = scaling_factor["beta_fast"] + beta_slow = scaling_factor["beta_slow"] + base = theta + original_max_position_embeddings = scaling_factor["original_max_position_embeddings"] + mscale_all_dim = scaling_factor["mscale_all_dim"] + mscale_ = scaling_factor["mscale"] + + internal_freq_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) + internal_freq = 1.0 / (factor * theta ** (internal_freq_base / head_dim)) + + extra_freq_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) + extra_freq = 1.0 / (theta ** (extra_freq_base / head_dim)) + + low, high = _yarn_find_correction_range(beta_fast, beta_slow, head_dim, base, + original_max_position_embeddings) + inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(low, high, head_dim // 2) + freqs = internal_freq * (1 - inv_freq_mask) + extra_freq * inv_freq_mask + mscale = float(_yarn_get_mscale(factor, mscale_) + / _yarn_get_mscale(factor, mscale_all_dim)) + + if extend_method == SeqExtendMethod.LLAMA3.value: + _check_llama3_scaling_factor(scaling_factor, max_position_embedding) + + factor = scaling_factor["factor"] + if factor is None or not isinstance(factor, float) or factor < 1.0: + raise ValueError(f"`scaling_factor`'s factor field must be a float >= 1, got {factor}") + + factor = scaling_factor["factor"] + low_freq_factor = scaling_factor["low_freq_factor"] + high_freq_factor = scaling_factor["high_freq_factor"] + old_context_len = scaling_factor["original_max_position_embeddings"] + + low_freq_wavelen = old_context_len / low_freq_factor + high_freq_wavelen = old_context_len / high_freq_factor + new_freqs = [] + for freq in freqs: + wavelen = 2 * math.pi / freq + if wavelen < high_freq_wavelen: + new_freqs.append(freq) + elif wavelen > low_freq_wavelen: + new_freqs.append(freq / factor) + else: + if low_freq_wavelen == high_freq_wavelen: + raise ValueError(f"low_freq_wavelen should not equal high_freq_wavelen, " + f"but low_freq_wavelen got {low_freq_wavelen}," + f"high_freq_wavelen got {high_freq_wavelen}.") + smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor) + new_freqs.append((1 - smooth) * freq / factor + smooth * freq) + freqs = np.array(new_freqs, dtype=freqs.dtype) + + if extend_method == SeqExtendMethod.PI.value: + t = np.arange(0, max_position_embedding / scaling_factor, 1 / scaling_factor).astype(np.float32) + else: + t = np.arange(0, max_position_embedding, 1).astype(np.float32) + + freqs = np.outer(t, freqs) # (max_position_embedding, head_dim // 2) + phase_shift = np.random.uniform(-0.1, 0.1, size=(head_dim // 2,)).astype(np.float32) + freqs = freqs + phase_shift + emb = np.concatenate((freqs, freqs), axis=-1) + freqs_cos = np.cos(emb) * mscale # (seq_len, head_dim) + freqs_sin = np.sin(emb) * mscale # (seq_len, head_dim) + swap_mask = ObfuscateFreqsMgr.get_swap_mask(head_dim) + + if parallel_config is not None and parallel_config.context_parallel > 1: + self.context_parallel = parallel_config.context_parallel + else: + self.context_parallel = 1 + self.head_dim = head_dim + self.is_dynamic = is_dynamic + self.freqs_cos = Tensor(freqs_cos, dtype=rotary_dtype) + self.freqs_sin = Tensor(freqs_sin, dtype=rotary_dtype) + self.swap_mask = Tensor(swap_mask, dtype=rotary_dtype) + + self.reshape = P.Reshape() + self.slice = P.StridedSlice() + self.gather = P.Gather() + self.tile = P.Tile() + if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL, ParallelMode.SEMI_AUTO_PARALLEL): + self.slice.shard(((1, 1),)) + self.gather.shard(((1, 1), (1,))) + self.tile.shard(((1, 1),)) + self.seq_pipe = parallel_config and parallel_config.seq_split_num and parallel_config.seq_split_num > 1 \ + and not limit_not_apply_seq_pipe + if self.seq_pipe: + self.seq_split_num = parallel_config.seq_split_num + self.seq_seg_len = seq_length // self.seq_split_num + np_range = np.arange(self.seq_seg_len) + self.seq_seg_range = Tensor(np_range, dtype=mstype.int32) + self.add_seq = P.Add() + + def construct(self, seq_length=None, seq_chunk=None): + """Get freqs_cos and freqs_sin""" + if self.seq_pipe: + seg_seq_range = self.add_seq(self.seq_seg_range, self.seq_seg_len * seq_chunk) + freqs_cos = self.gather(self.freqs_cos, seg_seq_range, 0) + freqs_sin = self.gather(self.freqs_sin, seg_seq_range, 0) + else: + freqs_cos = self.slice(self.freqs_cos, (0, 0), (seq_length, self.head_dim), (1, 1)) + freqs_sin = self.slice(self.freqs_sin, (0, 0), (seq_length, self.head_dim), (1, 1)) + freqs_cos = self.reshape(freqs_cos, (-1, 1, seq_length, self.head_dim)) + freqs_sin = self.reshape(freqs_sin, (-1, 1, seq_length, self.head_dim)) + return freqs_cos, freqs_sin, self.swap_mask + + def prefill(self, bs, seq_length): + if self.is_dynamic and not self.is_pynative: + return self.freqs_cos, self.freqs_sin, self.swap_mask + freqs_cos = self.tile(self.slice(self.freqs_cos, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) + freqs_sin = self.tile(self.slice(self.freqs_sin, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) + return freqs_cos, freqs_sin, self.swap_mask + + def increment(self, batch_valid_length): + indices = batch_valid_length - 1 + freqs_cos = self.gather(self.freqs_cos, indices, 0) + freqs_sin = self.gather(self.freqs_sin, indices, 0) + return freqs_cos, freqs_sin, self.swap_mask + + def increment_multi_ids(self, indices): + indices = indices.reshape(-1) + freqs_cos = self.gather(self.freqs_cos, indices, 0) + freqs_sin = self.gather(self.freqs_sin, indices, 0) + return freqs_cos, freqs_sin, self.swap_mask + + def chunk_with_decode(self, seq_range): + """Obtain the position encoding of chunks and increments""" + freqs_cos = self.gather(self.freqs_cos, seq_range, 0) + freqs_sin = self.gather(self.freqs_sin, seq_range, 0) + return freqs_cos, freqs_sin, self.swap_mask + + @staticmethod + def get_swap_mask(head_dim): + """Swap matrix""" + zero_block = np.zeros((head_dim // 2, head_dim // 2), dtype=np.float32) + id_block = np.identity(head_dim // 2, dtype=np.float32) + return np.block([[zero_block, id_block], [-id_block, zero_block]]) + +# class ObfuscateFreqsMgr(Cell): +# r"""freqs_cis manager.""" + +# def __init__(self, +# head_dim, +# seq_length=None, +# max_position_embedding=4096, +# rotary_dtype=mstype.float16, +# theta=10000, +# scaling_factor=1.0, +# extend_method=SeqExtendMethod.NONE.value, +# parallel_config=None, +# is_dynamic=False, +# limit_not_apply_seq_pipe=False): +# super().__init__() +# self.is_pynative = is_pynative() +# if seq_length is not None and seq_length > max_position_embedding: +# max_position_embedding = seq_length +# if extend_method == SeqExtendMethod.NTK.value: +# theta *= scaling_factor +# freqs_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) # (head_dim // 2, ) +# freqs = 1.0 / (theta ** (freqs_base / head_dim)) # (head_dim // 2, ) +# mscale = 1.0 +# if extend_method == SeqExtendMethod.LINEAR.value: +# _check_linear_scaling_factor(scaling_factor) +# factor = scaling_factor["factor"] +# freqs /= factor + +# if extend_method == SeqExtendMethod.YARN.value: +# _check_yarn_scaling_factor(scaling_factor, max_position_embedding) +# factor = scaling_factor["factor"] +# beta_fast = scaling_factor["beta_fast"] +# beta_slow = scaling_factor["beta_slow"] +# base = theta +# original_max_position_embeddings = scaling_factor["original_max_position_embeddings"] +# mscale_all_dim = scaling_factor["mscale_all_dim"] +# mscale_ = scaling_factor["mscale"] + +# internal_freq_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) +# internal_freq = 1.0 / (factor * theta ** (internal_freq_base / head_dim)) + +# extra_freq_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) +# extra_freq = 1.0 / (theta ** (extra_freq_base / head_dim)) + +# low, high = _yarn_find_correction_range(beta_fast, beta_slow, head_dim, base, +# original_max_position_embeddings) +# inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(low, high, head_dim // 2) +# freqs = internal_freq * (1 - inv_freq_mask) + extra_freq * inv_freq_mask +# mscale = float(_yarn_get_mscale(factor, mscale_) +# / _yarn_get_mscale(factor, mscale_all_dim)) + +# if extend_method == SeqExtendMethod.LLAMA3.value: +# _check_llama3_scaling_factor(scaling_factor, max_position_embedding) + +# factor = scaling_factor["factor"] +# if factor is None or not isinstance(factor, float) or factor < 1.0: +# raise ValueError(f"`scaling_factor`'s factor field must be a float >= 1, got {factor}") + +# factor = scaling_factor["factor"] +# low_freq_factor = scaling_factor["low_freq_factor"] +# high_freq_factor = scaling_factor["high_freq_factor"] +# old_context_len = scaling_factor["original_max_position_embeddings"] + +# low_freq_wavelen = old_context_len / low_freq_factor +# high_freq_wavelen = old_context_len / high_freq_factor +# new_freqs = [] +# for freq in freqs: +# wavelen = 2 * math.pi / freq +# if wavelen < high_freq_wavelen: +# new_freqs.append(freq) +# elif wavelen > low_freq_wavelen: +# new_freqs.append(freq / factor) +# else: +# if low_freq_wavelen == high_freq_wavelen: +# raise ValueError(f"low_freq_wavelen should not equal high_freq_wavelen, " +# f"but low_freq_wavelen got {low_freq_wavelen}," +# f"high_freq_wavelen got {high_freq_wavelen}.") +# smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor) +# new_freqs.append((1 - smooth) * freq / factor + smooth * freq) +# freqs = np.array(new_freqs, dtype=freqs.dtype) + +# if extend_method == SeqExtendMethod.PI.value: +# t = np.arange(0, max_position_embedding / scaling_factor, 1 / scaling_factor).astype(np.float32) +# else: +# t = np.arange(0, max_position_embedding, 1).astype(np.float32) + +# freqs = np.outer(t, freqs) # (max_position_embedding, head_dim // 2) +# phase_shift = np.random.uniform(-0.1, 0.1, size=(head_dim // 2,)).astype(np.float32) +# freqs_q = freqs + phase_shift +# freqs_k = freqs - phase_shift + +# emb_q = np.concatenate((freqs_q, freqs_q), axis=-1) +# freqs_cos_q = np.cos(emb_q) * mscale # (seq_len, head_dim) +# freqs_sin_q = np.sin(emb_q) * mscale # (seq_len, head_dim) + +# emb_k = np.concatenate((freqs_k, freqs_k), axis=-1) +# freqs_cos_k = np.cos(emb_k) * mscale # (seq_len, head_dim) +# freqs_sin_k = np.sin(emb_k) * mscale # (seq_len, head_dim) + +# swap_mask = ObfuscateFreqsMgr.get_swap_mask(head_dim) + + +# if parallel_config is not None and parallel_config.context_parallel > 1: +# self.context_parallel = parallel_config.context_parallel +# else: +# self.context_parallel = 1 +# self.head_dim = head_dim +# self.is_dynamic = is_dynamic +# self.freqs_cos_q = Tensor(freqs_cos_q, dtype=rotary_dtype) +# self.freqs_sin_q = Tensor(freqs_sin_q, dtype=rotary_dtype) +# self.freqs_cos_k = Tensor(freqs_cos_k, dtype=rotary_dtype) +# self.freqs_sin_k = Tensor(freqs_sin_k, dtype=rotary_dtype) +# self.swap_mask = Tensor(swap_mask, dtype=rotary_dtype) + +# self.reshape = P.Reshape() +# self.slice = P.StridedSlice() +# self.gather = P.Gather() +# self.tile = P.Tile() +# if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL, ParallelMode.SEMI_AUTO_PARALLEL): +# self.slice.shard(((1, 1),)) +# self.gather.shard(((1, 1), (1,))) +# self.tile.shard(((1, 1),)) +# self.seq_pipe = parallel_config and parallel_config.seq_split_num and parallel_config.seq_split_num > 1 \ +# and not limit_not_apply_seq_pipe +# if self.seq_pipe: +# self.seq_split_num = parallel_config.seq_split_num +# self.seq_seg_len = seq_length // self.seq_split_num +# np_range = np.arange(self.seq_seg_len) +# self.seq_seg_range = Tensor(np_range, dtype=mstype.int32) +# self.add_seq = P.Add() + +# def construct(self, seq_length=None, seq_chunk=None): +# """Get freqs_cos and freqs_sin""" +# if self.seq_pipe: +# seg_seq_range = self.add_seq(self.seq_seg_range, self.seq_seg_len * seq_chunk) +# freqs_cos_q = self.gather(self.freqs_cos_q, seg_seq_range, 0) +# freqs_sin_q = self.gather(self.freqs_sin_q, seg_seq_range, 0) +# freqs_cos_k = self.gather(self.freqs_cos_k, seg_seq_range, 0) +# freqs_sin_k = self.gather(self.freqs_sin_k, seg_seq_range, 0) +# else: +# freqs_cos_q = self.slice(self.freqs_cos_q, (0, 0), (seq_length, self.head_dim), (1, 1)) +# freqs_sin_q = self.slice(self.freqs_sin_q, (0, 0), (seq_length, self.head_dim), (1, 1)) +# freqs_cos_k = self.slice(self.freqs_cos_k, (0, 0), (seq_length, self.head_dim), (1, 1)) +# freqs_sin_k = self.slice(self.freqs_sin_k, (0, 0), (seq_length, self.head_dim), (1, 1)) +# freqs_cos_q = self.reshape(freqs_cos_q, (-1, 1, seq_length, self.head_dim)) +# freqs_sin_q = self.reshape(freqs_sin_q, (-1, 1, seq_length, self.head_dim)) +# freqs_cos_k = self.reshape(freqs_cos_k, (-1, 1, seq_length, self.head_dim)) +# freqs_sin_k = self.reshape(freqs_sin_k, (-1, 1, seq_length, self.head_dim)) +# return freqs_cos_q, freqs_sin_q, freqs_cos_k, freqs_sin_k, self.swap_mask + +# def prefill(self, bs, seq_length): +# if self.is_dynamic and not self.is_pynative: +# return self.freqs_cos_q, self.freqs_sin_q, self.freqs_cos_k, self.freqs_sin_k, self.swap_mask +# freqs_cos_q = self.tile(self.slice(self.freqs_cos_q, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) +# freqs_sin_q = self.tile(self.slice(self.freqs_sin_q, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) +# freqs_cos_k = self.tile(self.slice(self.freqs_cos_k, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) +# freqs_sin_k = self.tile(self.slice(self.freqs_sin_k, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) +# return freqs_cos_q, freqs_sin_q, freqs_cos_k, freqs_sin_k, self.swap_mask + +# def increment(self, batch_valid_length): +# indices = batch_valid_length - 1 +# freqs_cos_q = self.gather(self.freqs_cos_q, indices, 0) +# freqs_sin_q = self.gather(self.freqs_sin_q, indices, 0) +# freqs_cos_k = self.gather(self.freqs_cos_k, indices, 0) +# freqs_sin_k = self.gather(self.freqs_sin_k, indices, 0) +# return freqs_cos_q, freqs_sin_q, freqs_cos_k, freqs_sin_k, self.swap_mask + +# def increment_multi_ids(self, indices): +# indices = indices.reshape(-1) +# freqs_cos_q = self.gather(self.freqs_cos_q, indices, 0) +# freqs_sin_q = self.gather(self.freqs_sin_q, indices, 0) +# freqs_cos_k = self.gather(self.freqs_cos_k, indices, 0) +# freqs_sin_k = self.gather(self.freqs_sin_k, indices, 0) +# return freqs_cos_q, freqs_sin_q, freqs_cos_k, freqs_sin_k, self.swap_mask + +# def chunk_with_decode(self, seq_range): +# """Obtain the position encoding of chunks and increments""" +# freqs_cos_q = self.gather(self.freqs_cos_q, seq_range, 0) +# freqs_sin_q = self.gather(self.freqs_sin_q, seq_range, 0) +# freqs_cos_k = self.gather(self.freqs_cos_k, seq_range, 0) +# freqs_sin_k = self.gather(self.freqs_sin_k, seq_range, 0) +# return freqs_cos_q, freqs_sin_q, freqs_cos_k, freqs_sin_k, self.swap_mask + +# @staticmethod +# def get_swap_mask(head_dim): +# """Swap matrix""" +# zero_block = np.zeros((head_dim // 2, head_dim // 2), dtype=np.float32) +# id_block = np.identity(head_dim // 2, dtype=np.float32) +# return np.block([[zero_block, id_block], [-id_block, zero_block]]) \ No newline at end of file diff --git a/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py b/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py new file mode 100644 index 0000000..76210f2 --- /dev/null +++ b/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py @@ -0,0 +1,196 @@ +import numpy as np +import mindspore as ms +import mindspore.common.dtype as mstype +from mindspore import Parameter, Tensor, mint, nn, ops +from mindspore.ops import operations as P +from mindspore.common.initializer import initializer + +from research.deepseek3.deepseek3_model_infer import DeepseekV3Model, DeepseekV3Attention +from vllm_mindspore.model_executor.models.mf_models.deepseekv3_weight_processor import DeepseekV3WeightProcessor +from mindformers.modules.layers import FreqsMgr, SeqExtendMethod + +import ObfuscateFreqsMgr + +_orig_init = DeepseekV3Model.__init__ + +def _patched_init(self, config, *args, **kwargs): + _orig_init(self, config, *args, **kwargs) + self.token_p = Parameter(Tensor(np.arange(config.hidden_size), mstype.int32), + name='p_inv', parallel_optimizer=False) + self.emb_p_inv = Parameter(Tensor(np.arange(config.vocab_size), mstype.int32), + name='emb_p_inv', parallel_optimizer=False) + self.permute = ops.Gather().set_device('CPU') + self.recover = ops.Gather().set_device('CPU') + self.freqs_mgr = ObfuscateFreqsMgr(head_dim=self.qk_rope_head_dim, + seq_length=config.seq_length, + max_position_embedding=config.max_position_embeddings, + rotary_dtype=config.rotary_dtype, + theta=config.theta, + scaling_factor=config.scaling_factor, + extend_method=config.extend_method, + is_dynamic=config.is_dynamic) + +DeepseekV3Model.__init__ = _patched_init + + +def _patched_construct(self, tokens: Tensor, batch_valid_length=None, batch_index=None, zactivate_len=None, + block_tables=None, slot_mapping=None): + """ + Forward of deepseekv3 model. + + Args: + tokens: the tokenized inputs with datatype int32 + batch_valid_length(Tensor): the past calculated the index with datatype int32, used for incremental + prediction. Tensor of shape :math:`(batch_size,)`. Default None. + batch_index(Tensor): The generated batch index when use continuous batching in LLM serving. + Tensor of shape :math:`(batch_size,)`. Default None. + zactivate_len(Tensor): The slice length of KVCache when use dynamic shape infer. + Tensor of shape :math:`(seq_length,)`. Default None. + block_tables(Tensor[int64]): Store mapping tables for each sequence. + slot_mapping(Tensor[int32]): Store token cache physical slot index. + + Returns: + output: Tensor, the output of deepseekv3 decoderlayer + """ + # preprocess + bs, seq_len = self.shape(tokens) + mask = None + if self.use_past: + if self.is_first_iteration: + freqs_cis = self.freqs_mgr.prefill(bs, seq_len) + if not self.is_pynative: + mask = self.casual_mask.prefill() + else: + mask = self.casual_mask(tokens) + else: + freqs_cis = self.freqs_mgr.increment(batch_valid_length) + + else: + mask = self.casual_mask(tokens) + freqs_cis = self.freqs_mgr(seq_len) + + tokens = self.permute(self.token_p, tokens, 0) + h = self.cast(self.tok_embeddings(tokens), self.dtype) + h = self.reshape(h, (bs, seq_len, self.hidden_size)) + + for i in range(self.num_layers): + h = self.layers[i](h, freqs_cis, mask, batch_valid_length=batch_valid_length, + block_tables=block_tables, slot_mapping=slot_mapping) + h = self.cast(h, mstype.float16) + h = self.recover(h, self.emb_p_inv, axis=1) + h = self.cast(h, self.dtype) + output = self.norm_out(h) + return output + +DeepseekV3Model.construct = _patched_construct + + +_orig_infer_convert_outer_weight = DeepseekV3WeightProcessor.infer_convert_outer_weight + +def _patched_infer_convert_outer_weight(self, src_hf_dir, hf_weight_map): + """convert weight not in model""" + _orig_infer_convert_outer_weight(self, src_hf_dir, hf_weight_map) + + token_p_hf_name = "model.token_p" + token_p_ms_name = self.convert_weight_name(token_p_hf_name) + np_data, _ = self.get_safetensor_from_file(token_p_hf_name, src_hf_dir, hf_weight_map) + self.parameter_dict[token_p_ms_name] = ms.Parameter(ms.from_numpy(np_data).astype(ms.int32), + name=token_p_ms_name, + requires_grad=False) + emb_p_hf_name = "model.emb_p_inv" + emb_p_ms_name = self.convert_weight_name(emb_p_hf_name) + np_data, _ = self.get_safetensor_from_file(emb_p_hf_name, src_hf_dir, hf_weight_map) + self.parameter_dict[emb_p_ms_name] = ms.Parameter(ms.from_numpy(np_data).astype(ms.int32), + name=emb_p_ms_name, + requires_grad=False) + + +DeepseekV3WeightProcessor.infer_convert_outer_weight = _patched_infer_convert_outer_weight + +# def _patched_attention_construct(self, x: Tensor, freqs_cis: Tuple[Tensor, Tensor], mask=None, batch_valid_length=None, +# block_tables=None, slot_mapping=None): +# """ Forward process of the DeepseekV3Attention. """ +# ori_dtype = x.dtype + +# if self.q_lora_rank == 0: +# bs, seq_len, _ = self.shape(x) +# q = self.q_proj(x) +# latent_kv_all = self.kv2l(x) +# latent_kv, k_pe = mint.split(latent_kv_all, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) +# else: +# if self.qkv_concat: +# qkv2l = self.qkv2l(x) +# q, latent_kv, k_pe = mint.split(qkv2l, [self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim], +# dim=-1) +# bs, seq_len, _ = self.shape(q) +# norm_q = self.lq_norm(q) +# q = self.l2q_proj(norm_q) +# else: +# q = self.q2l_proj(x) +# bs, seq_len, _ = self.shape(q) +# norm_q = self.lq_norm(q) +# q = self.l2q_proj(norm_q) +# latent_kv_all = self.kv2l(x) +# latent_kv, k_pe = mint.split(latent_kv_all, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) + +# q = self.reshape(q, (bs, seq_len, self.n_local_heads, self.q_head_dim)) +# q_nope, q_pe = mint.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) +# i_kv = self.lkv_norm(latent_kv) + +# k_pe = self.reshape(k_pe, (bs, seq_len, 1, self.qk_rope_head_dim)) + +# freqs_cos_q, freqs_sin_q, freqs_cos_k, freqs_sin_k, swap_mask = freqs_cis +# freqs_cis_q = (freqs_cos_q, freqs_sin_q, swap_mask) +# freqs_cis_k = (freqs_cos_k, freqs_sin_k, swap_mask) + +# q_pe, _ = self.apply_rotary_emb(q_pe, k_pe, freqs_cis_q, batch_valid_length) +# _, k_pe = self.apply_rotary_emb(q_pe, k_pe, freqs_cis_k, batch_valid_length) + +# q_pe = self.reshape(q_pe, (bs, seq_len, self.n_local_heads, self.qk_rope_head_dim)) +# k_pe = self.reshape(k_pe, (bs, seq_len, 1, self.qk_rope_head_dim)) + +# key_states_cache = self.kpe_concat((i_kv, k_pe.view(bs, seq_len, self.qk_rope_head_dim))) +# key_out = self.infer_attention.paged_attention_mgr(key_states_cache, slot_mapping) +# q_nope = ops.depend(q_nope, key_out) + +# if self.is_first_iteration: +# o_k_nope = self.lkv2kv_k_nope(i_kv) +# o_v = self.lkv2kv_v(i_kv) +# k_nope = self.reshape(o_k_nope, (bs, seq_len, self.n_local_heads, self.qk_nope_head_dim)) +# value_states = self.reshape(o_v, (bs, seq_len, self.n_local_heads, self.v_head_dim)) +# query_states = self.pe_concat((q_nope, q_pe)) +# k_pe = self.tile_kv(k_pe, (1, 1, self.n_local_heads, 1)) +# key_states = self.pe_concat((k_nope, k_pe)) +# value_states = self.pe_concat((value_states, k_pe)) + +# key_states = key_states.view(bs, seq_len, -1) +# value_states = value_states.view(bs, seq_len, -1) +# query_states = query_states.view(bs, seq_len, -1) + +# context_layer = self.infer_attention(query_states, key_states, value_states, batch_valid_length, +# block_tables, mask) + +# context_layer = context_layer.view(bs, seq_len, self.n_local_heads, self.q_head_dim) +# context_layer = self.dim_slice_4d(context_layer, (0, 0, 0, 0), (bs, seq_len, self.n_local_heads, +# self.v_head_dim)) +# attn_out = context_layer.view(bs, seq_len, self.n_local_heads * self.v_head_dim) +# output = self.wo(attn_out) +# output = self.cast(output, ori_dtype) +# return output + +# q_absorb = self.lkv2kv_k_nope.weight.view(self.n_local_heads, self.qk_nope_head_dim, self.kv_lora_rank) +# out_absorb = self.lkv2kv_v.weight.view(self.n_local_heads, self.v_head_dim, self.kv_lora_rank) +# q_nope = self.qabsorb_matmul(q_nope.transpose(0, 2, 1, 3), q_absorb).transpose(0, 2, 1, 3) +# query_states = self.pe_concat((q_nope, q_pe)) +# query_states = query_states.view(bs, seq_len, -1) +# key_states = key_states_cache +# context_layer = self.infer_attention(query_states, key_states, key_states, batch_valid_length, +# block_tables, attn_mask=mask) +# context_layer = context_layer.view(bs, seq_len, self.n_local_heads, -1).transpose(0, 2, 1, 3) +# attn_out = self.outabsorb_matmul(context_layer, out_absorb).transpose(0, 2, 1, 3) +# attn_out = attn_out.view(bs, seq_len, self.n_local_heads * self.v_head_dim) +# output = self.wo(attn_out) +# output = self.cast(output, ori_dtype) +# return output + +# DeepseekV3Attention.construct = _patched_attention_construct \ No newline at end of file diff --git a/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml new file mode 100644 index 0000000..6368a67 --- /dev/null +++ b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml @@ -0,0 +1,316 @@ +obf_metadata_config: +- name: token_p + shape: [129280, ] + type: rearrange + save_metadata: True +- name: token_p_inv + shape: [129280, ] + type: rearrange + save_metadata: False +- name: emb_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: emb_p_inv + shape: [7168, ] + type: rearrange + save_metadata: True +- name: qa_p + shape: [1536, ] + type: rearrange + save_metadata: False +- name: qa_p_inv + shape: [1536, ] + type: rearrange + save_metadata: False +- name: qb_p + shape: [24576, ] + type: rearrange + save_metadata: False +- name: kva_p + shape: [576, ] + type: rearrange + save_metadata: False +- name: kva_p_nope + shape: [512, ] + type: rearrange + save_metadata: False +- name: kva_p_nope_inv + shape: [512, ] + type: rearrange + save_metadata: False +- name: kvb_p + shape: [32768, ] + type: random + save_metadata: False +- name: vb_p_inv + shape: [16384, ] + type: random + save_metadata: False +- name: o_p + shape: [7168, ] + type: random + save_metadata: False +- name: o_p_inv + shape: [7168, ] + type: random + save_metadata: False +- name: mlp_up_p + shape: [18432, ] + type: random + save_metadata: False +- name: mlp_up_p_inv + shape: [18432, ] + type: random + save_metadata: False +- name: mlp_down_p + shape: [7168, ] + type: random + save_metadata: False +- name: expert_up_p + shape: [2048, ] + type: random + save_metadata: False +- name: expert_up_p_inv + shape: [2048, ] + type: random + save_metadata: False +- name: expert_down_p + shape: [7168, ] + type: random + save_metadata: False +- name: shared_expert_up_p + shape: [2048, ] + type: random + save_metadata: False +- name: shared_expert_up_p_inv + shape: [2048, ] + type: random + save_metadata: False +- name: shared_expert_down_p + shape: [7168, ] + type: random + save_metadata: False + +weight_obf_config: +- target: model/embed_tokens/weight + weight_obf_ops: + - name: permuate + input_x: weight + input_y: token_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/self_attn/q_a_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: qa_p + axis: 1 + +- target: model/layers/${layer}/self_attn/q_a_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: qa_p + axis: 1 + +- target: model/layers/${layer}/self_attn/q_b_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: qa_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: qb_p + axis: 1 + +- target: model/layers/${layer}/self_attn/kv_a_proj_with_mqa/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: kva_p + axis: 1 + +- target: model/layers/${layer}/self_attn/kv_a_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: kva_p_nope + axis: 1 + +- target: model/layers/${layer}/self_attn/kv_b_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: kva_p_nope_inv + axis: 0 + - name: permuate + input_x: weight + input_y: kvb_p + axis: 1 + +- target: model/layers/${layer}/self_attn/o_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: vb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: o_p + axis: 1 + +- target: model/layers/${layer}/mlp/gate_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 1 + +- target: model/layers/${layer}/mlp/up_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 1 + +- target: model/layers/${layer}/mlp/down_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: mlp_up_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: mlp_down_p + axis: 1 + +- target: model/layers/${layer}/input_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/post_attention_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/mlp/gate/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/mlp/shared_experts/gate_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 1 + +- target: model/layers/${layer}/mlp/shared_experts/up_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 1 + +- target: model/layers/${layer}/mlp/shared_experts/down_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: shared_expert_up_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: shared_expert_down_p_inv + axis: 1 + +- target: model/layers/${layer}/mlp/experts/${expert}/gate_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 1 + +- target: model/layers/${layer}/mlp/experts/${expert}/up_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 1 + +- target: model/layers/${layer}/mlp/experts/${expert}/down_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: expert_up_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: expert_down_p + axis: 1 \ No newline at end of file diff --git a/examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py b/examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py new file mode 100644 index 0000000..03843c1 --- /dev/null +++ b/examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py @@ -0,0 +1,119 @@ +# Copyright 2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import yaml +import numpy as np +from mindarmour import ModelObfuscator + +def inv_permutation(p): + inv_p = [0]*len(p) + for old_idx, new_idx in enumerate(p): + inv_p[new_idx] = old_idx + return inv_p + +def gen_colums_permuate_list_MLA(hidden_size_1, hidden_size_2, heads): + pi = [] + pi_1_one = np.random.permutation(np.arange(0, hidden_size_1)) + pi_2_one = np.random.permutation(np.arange(0, hidden_size_2)) + total_dims = hidden_size_1 + hidden_size_2 + for i in range(heads): + pi += (pi_1_one + i * total_dims).tolist() + pi += (pi_2_one + i * total_dims + hidden_size_1).tolist() + return pi, pi_1_one.tolist(), pi_2_one.tolist() + +def repeat_permute(permute_list, heads): + pi = [] + permute_arr = np.array(permute_list) + for i in range(heads): + pi += (permute_arr + i * len(permute_list)).tolist() + return pi + +def test_deepseekv3_weight_obfuscate(src_path, saved_path, obf_config_path): + with open(obf_config_path, 'r') as f: + obf_config = yaml.safe_load(f) + obf = ModelObfuscator(obf_config, obfuscate_scale=100) + vocab_size = 129280 + hidden_size = 7168 + num_heads = 128 + layers = 61 + q_lora_rank = 1536 + kv_lora_rank = 512 + qk_nope_head_dim = 128 + qk_rope_head_dim = 64 + v_head_dim = 128 + MLP_inter_dim = 18432 + moe_inter_dim = 2048 + shared_moe_inter_dim = 2048 + + token_p, _, _ = gen_colums_permuate_list_MLA(vocab_size, 0, 1) + token_p_inv = inv_permutation(token_p) + emb_p, _, _ = gen_colums_permuate_list_MLA(hidden_size, 0 ,1) + emb_p_inv = inv_permutation(emb_p) + + qa_p, _, _ = gen_colums_permuate_list_MLA(q_lora_rank, 0, 1) + qa_p_inv = inv_permutation(qa_p) + + qb_p, qb_p_nope_one, qb_p_rope = gen_colums_permuate_list_MLA(qk_nope_head_dim, qk_rope_head_dim, num_heads) + + # _, kva_p, kva_p_rope = gen_colums_permuate_list_MLA(kv_lora_rank, qk_rope_head_dim, 1) + # kva_p_inv = inv_permutation(kva_p) + + _, kva_p_nope, _ = gen_colums_permuate_list_MLA(kv_lora_rank, 0, 1) + ka_p_rope = qb_p_rope + kva_p = kva_p_nope + (np.array(ka_p_rope) + len(kva_p_nope)).tolist() + kva_p_nope_inv = inv_permutation(kva_p_nope) + + + kb_p_nope_one = qb_p_nope_one + vb_p, vb_p_one, _ = gen_colums_permuate_list_MLA(v_head_dim, 0, num_heads) + kvb_p_one = kb_p_nope_one + (np.array(vb_p_one) + len(kb_p_nope_one)).tolist() + kvb_p = repeat_permute(kvb_p_one, num_heads) + vp_b_inv = inv_permutation(vb_p) + + # _, o_p, _ = gen_colums_permuate_list_MLA(hidden_size, 0, 1) + o_p = emb_p + o_p_inv = inv_permutation(o_p) + + mlp_up_p, _, _ = gen_colums_permuate_list_MLA(MLP_inter_dim, 0, 1) + mlp_up_p_inv = inv_permutation(mlp_up_p) + mlp_down_p = emb_p + + expert_up_p, _, _ = gen_colums_permuate_list_MLA(moe_inter_dim, 0, 1) + expert_up_p_inv = inv_permutation(expert_up_p) + expert_down_p = emb_p + + shared_expert_up_p = gen_colums_permuate_list_MLA(shared_moe_inter_dim, 0, 1) + shared_expert_up_p_inv = inv_permutation(shared_expert_up_p) + shared_expert_down_p = emb_p + + + + obf_metadata = {"token_p": np.array(token_p), "token_p_inv": np.array(token_p_inv), "emb_p": np.array(emb_p), "emb_p_inv" : np.array(emb_p_inv), "qa_p" : np.array(qa_p), "qa_p_inv" : np.array(qa_p_inv), "qb_p" : np.array(qb_p), "kva_p" : np.array(kva_p), "kva_p_nope" :np.array(kva_p_nope), "kva_p_nope_inv" : np.array(kva_p_nope_inv), "kvb_p" :np.array(kvb_p), "vb_p_inv" :np.array(vp_b_inv), "o_p" :np.array(o_p), "op_inv" :np.array(o_p_inv), + "mlp_up_p" : np.array(mlp_up_p), "mlp_up_p_inv" : np.array(mlp_up_p_inv), "mlp_down_p" : np.array(mlp_down_p), "expert_up_p" : np.array(expert_up_p), "expert_up_p_inv" :np.array(expert_up_p_inv), "expert_down_p" : np.array(expert_down_p), + "shared_expert_up_p" : np.array(shared_expert_up_p), "shared_expert_up_p_inv" : np.array(shared_expert_up_p_inv), "shared_expert_down_p" : np.array(shared_expert_down_p)} + + obf.set_metadata(obf_metadata) + metadata_mapping = {} + metadata_mapping['model.token_p'] = "token_p" + metadata_mapping['model.emb_p_inv'] = "emb_pi_inv" + obf.set_save_metadata_mapping(metadata_mapping) + obf.obfuscate_weight_files(src_path, saved_path=saved_path) + +if __name__ == '__main__': + if len(sys.argv) != 4: + print("Usage: python deepseekv3_weight_obfuscate.py ") + sys.exit(1) + src_path, saved_path, obf_config_path = sys.argv[1], sys.argv[2], sys.argv[3] + test_deepseekv3_weight_obfuscate(src_path, saved_path, obf_config_path) \ No newline at end of file -- Gitee From 2e780c322b262f2338dd5585e2b2d00257a8440e Mon Sep 17 00:00:00 2001 From: aojiaosaiban <2993174330@qq.com> Date: Sun, 31 Aug 2025 10:58:17 +0800 Subject: [PATCH 3/4] support rope protection --- .../infer/network_patch/ObfuscateFreqsMgr.py | 5 +++- .../deepseekv3_ms_network_obfuscate.py | 15 +++++++++- .../config/deepseekv3_obf_config.yaml | 4 +++ .../deepseekv3_weight_obfuscate.py | 29 ++++++++++++++----- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py b/examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py index bb103d6..33400a4 100644 --- a/examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py +++ b/examples/model_protection/deepseekv3/infer/network_patch/ObfuscateFreqsMgr.py @@ -26,7 +26,9 @@ class ObfuscateFreqsMgr(Cell): extend_method=SeqExtendMethod.NONE.value, parallel_config=None, is_dynamic=False, - limit_not_apply_seq_pipe=False): + limit_not_apply_seq_pipe=False, + rope_p=None, + rope_permute=None): super().__init__() self.is_pynative = is_pynative() if seq_length is not None and seq_length > max_position_embedding: @@ -100,6 +102,7 @@ class ObfuscateFreqsMgr(Cell): t = np.arange(0, max_position_embedding, 1).astype(np.float32) freqs = np.outer(t, freqs) # (max_position_embedding, head_dim // 2) + freqs = rope_permute(freqs, rope_p, axis=1) phase_shift = np.random.uniform(-0.1, 0.1, size=(head_dim // 2,)).astype(np.float32) freqs = freqs + phase_shift emb = np.concatenate((freqs, freqs), axis=-1) diff --git a/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py b/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py index 76210f2..21af387 100644 --- a/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py +++ b/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py @@ -19,8 +19,12 @@ def _patched_init(self, config, *args, **kwargs): name='p_inv', parallel_optimizer=False) self.emb_p_inv = Parameter(Tensor(np.arange(config.vocab_size), mstype.int32), name='emb_p_inv', parallel_optimizer=False) + self.rope_p = Parameter(Tensor(np.arange(config.hidden_size), mstype.int32), + name='rope_p', parallel_optimizer=False) + self.permute = ops.Gather().set_device('CPU') self.recover = ops.Gather().set_device('CPU') + self.rope_permute = ops.Gather().set_device('CPU') self.freqs_mgr = ObfuscateFreqsMgr(head_dim=self.qk_rope_head_dim, seq_length=config.seq_length, max_position_embedding=config.max_position_embeddings, @@ -28,7 +32,9 @@ def _patched_init(self, config, *args, **kwargs): theta=config.theta, scaling_factor=config.scaling_factor, extend_method=config.extend_method, - is_dynamic=config.is_dynamic) + is_dynamic=config.is_dynamic, + rope_p=self.rope_p, + rope_permute=self.rope_permute) DeepseekV3Model.__init__ = _patched_init @@ -103,6 +109,13 @@ def _patched_infer_convert_outer_weight(self, src_hf_dir, hf_weight_map): self.parameter_dict[emb_p_ms_name] = ms.Parameter(ms.from_numpy(np_data).astype(ms.int32), name=emb_p_ms_name, requires_grad=False) + + rope_p_hf_name = "model.rope_p" + rope_p_ms_name = self.convert_weight_name(rope_p_hf_name) + np_data, _ = self.get_safetensor_from_file(rope_p_hf_name, src_hf_dir, hf_weight_map) + self.parameter_dict[rope_p_ms_name] = ms.Parameter(ms.from_numpy(np_data).astype(ms.int32), + name=rope_p_ms_name, + requires_grad=False) DeepseekV3WeightProcessor.infer_convert_outer_weight = _patched_infer_convert_outer_weight diff --git a/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml index 6368a67..49ce26b 100644 --- a/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml +++ b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config.yaml @@ -1,4 +1,8 @@ obf_metadata_config: +- name: rope_p + shape: [32, ] + type: rearrange + save_metadata: True - name: token_p shape: [129280, ] type: rearrange diff --git a/examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py b/examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py index 03843c1..c4b74cc 100644 --- a/examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py +++ b/examples/model_protection/deepseekv3/obfuscation/deepseekv3_weight_obfuscate.py @@ -23,10 +23,16 @@ def inv_permutation(p): inv_p[new_idx] = old_idx return inv_p -def gen_colums_permuate_list_MLA(hidden_size_1, hidden_size_2, heads): +def gen_colums_permuate_list_MLA(hidden_size_1, hidden_size_2, heads, rope_contained = False): pi = [] pi_1_one = np.random.permutation(np.arange(0, hidden_size_1)) - pi_2_one = np.random.permutation(np.arange(0, hidden_size_2)) + if rope_contained: + pi_2_one_base_sequence = np.arange(0, hidden_size_2) + groups = [pi_2_one_base_sequence[i*2:(i+1)*2] for i in range(hidden_size_2 // 2)] + np.random.shuffle(groups) + pi_2_one = np.concatenate(groups) + else: + pi_2_one = np.random.permutation(np.arange(0, hidden_size_2)) total_dims = hidden_size_1 + hidden_size_2 for i in range(heads): pi += (pi_1_one + i * total_dims).tolist() @@ -39,7 +45,15 @@ def repeat_permute(permute_list, heads): for i in range(heads): pi += (permute_arr + i * len(permute_list)).tolist() return pi - + +def get_rope_permute_list(rope_list: list): + rope_permute_list = [] + for i in range(0, len(rope_list), 2): + first_element = rope_list[i] + original_group_idx = first_element // 2 + rope_permute_list.append(original_group_idx) + return rope_permute_list + def test_deepseekv3_weight_obfuscate(src_path, saved_path, obf_config_path): with open(obf_config_path, 'r') as f: obf_config = yaml.safe_load(f) @@ -65,8 +79,8 @@ def test_deepseekv3_weight_obfuscate(src_path, saved_path, obf_config_path): qa_p, _, _ = gen_colums_permuate_list_MLA(q_lora_rank, 0, 1) qa_p_inv = inv_permutation(qa_p) - qb_p, qb_p_nope_one, qb_p_rope = gen_colums_permuate_list_MLA(qk_nope_head_dim, qk_rope_head_dim, num_heads) - + qb_p, qb_p_nope_one, qb_p_rope = gen_colums_permuate_list_MLA(qk_nope_head_dim, qk_rope_head_dim, num_heads, True) + rope_p = get_rope_permute_list(qb_p_rope) # _, kva_p, kva_p_rope = gen_colums_permuate_list_MLA(kv_lora_rank, qk_rope_head_dim, 1) # kva_p_inv = inv_permutation(kva_p) @@ -94,7 +108,7 @@ def test_deepseekv3_weight_obfuscate(src_path, saved_path, obf_config_path): expert_up_p_inv = inv_permutation(expert_up_p) expert_down_p = emb_p - shared_expert_up_p = gen_colums_permuate_list_MLA(shared_moe_inter_dim, 0, 1) + shared_expert_up_p, _, _ = gen_colums_permuate_list_MLA(shared_moe_inter_dim, 0, 1) shared_expert_up_p_inv = inv_permutation(shared_expert_up_p) shared_expert_down_p = emb_p @@ -102,12 +116,13 @@ def test_deepseekv3_weight_obfuscate(src_path, saved_path, obf_config_path): obf_metadata = {"token_p": np.array(token_p), "token_p_inv": np.array(token_p_inv), "emb_p": np.array(emb_p), "emb_p_inv" : np.array(emb_p_inv), "qa_p" : np.array(qa_p), "qa_p_inv" : np.array(qa_p_inv), "qb_p" : np.array(qb_p), "kva_p" : np.array(kva_p), "kva_p_nope" :np.array(kva_p_nope), "kva_p_nope_inv" : np.array(kva_p_nope_inv), "kvb_p" :np.array(kvb_p), "vb_p_inv" :np.array(vp_b_inv), "o_p" :np.array(o_p), "op_inv" :np.array(o_p_inv), "mlp_up_p" : np.array(mlp_up_p), "mlp_up_p_inv" : np.array(mlp_up_p_inv), "mlp_down_p" : np.array(mlp_down_p), "expert_up_p" : np.array(expert_up_p), "expert_up_p_inv" :np.array(expert_up_p_inv), "expert_down_p" : np.array(expert_down_p), - "shared_expert_up_p" : np.array(shared_expert_up_p), "shared_expert_up_p_inv" : np.array(shared_expert_up_p_inv), "shared_expert_down_p" : np.array(shared_expert_down_p)} + "shared_expert_up_p" : np.array(shared_expert_up_p), "shared_expert_up_p_inv" : np.array(shared_expert_up_p_inv), "shared_expert_down_p" : np.array(shared_expert_down_p), "rope_p" : np.array(rope_p)} obf.set_metadata(obf_metadata) metadata_mapping = {} metadata_mapping['model.token_p'] = "token_p" metadata_mapping['model.emb_p_inv'] = "emb_pi_inv" + metadata_mapping['model.rope_p'] = "rope_p" obf.set_save_metadata_mapping(metadata_mapping) obf.obfuscate_weight_files(src_path, saved_path=saved_path) -- Gitee From 5b6ef85e2063eaf198053622906a76738269e290 Mon Sep 17 00:00:00 2001 From: aojiaosaiban <2993174330@qq.com> Date: Sun, 14 Sep 2025 21:40:12 +0800 Subject: [PATCH 4/4] add support for Deepseek V3 obfuscation and infernce under obfuscation --- .../infer/deepseekv3_obfuscate_inference.py | 120 ++++--- .../deepseekv3_ms_network_obfuscate.py | 224 +++++------- .../rope_patch/ObfuscateFreqsMgr.py | 185 ++++++++++ .../network_patch/rope_patch/__init__.py | 0 .../deepseekv3_obf_config_huggingface.yaml | 320 ++++++++++++++++++ .../config/deepseekv3_obf_config_ms.yaml | 320 ++++++++++++++++++ mindarmour/model_protection/obfuscation.py | 5 +- 7 files changed, 993 insertions(+), 181 deletions(-) create mode 100644 examples/model_protection/deepseekv3/infer/network_patch/rope_patch/ObfuscateFreqsMgr.py create mode 100644 examples/model_protection/deepseekv3/infer/network_patch/rope_patch/__init__.py create mode 100644 examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_huggingface.yaml create mode 100644 examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_ms.yaml diff --git a/examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py b/examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py index 63d97ad..594bf5a 100644 --- a/examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py +++ b/examples/model_protection/deepseekv3/infer/deepseekv3_obfuscate_inference.py @@ -12,43 +12,83 @@ # See the License for the specific language governing permissions and # limitations under the License. -from network_patch import deepseekv3_ms_network_obfuscate # Add this line on the top of script. -# from network_patch import qwen2_5_7b_instruct_ms_network_obfuscate -import time -import sys -import vllm_mindspore -from vllm import LLM, SamplingParams - -# Sample prompts. -prompts = [ - "I am", - "Today is", - "Llama is" -] - -def test_deepseekv3_obfuscate_inference(model_path): - # Create a sampling params object. - sampling_params = SamplingParams(temperature=0.0, top_p=0.95, max_tokens=50) - - # Create a LLM - llm = LLM(model=model_path, tensor_parallel_size=2) # Path of the obfuscated weight files - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - start_time = time.perf_counter() - outputs = llm.generate(prompts, sampling_params) - end_time = time.perf_counter() - elapsed_time = end_time - start_time - # Print the outputs. - for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}. Generated text: {generated_text!r}") - print(f"Infernce time: {elapsed_time} seconds") - -if __name__ == '__main__': - if len(sys.argv) != 2: - print("Usage: python deepseekv3_obfuscate_inference.py ") - - sys.exit(1) - model_path = sys.argv[1] - test_deepseekv3_obfuscate_inference(model_path) \ No newline at end of file +import argparse + +import mindspore as ms +from mindspore import Model, Tensor +from mindspore.common import initializer + +from mindformers import MindFormerConfig +from mindformers import build_context +from mindformers.tools.logger import logger +from mindformers.trainer.utils import transform_and_load_checkpoint +from mindformers.core.parallel_config import build_parallel_config +from mindformers.models.llama.llama_tokenizer_fast import LlamaTokenizerFast + +from research.deepseek3.deepseek3_model_infer import InferenceDeepseekV3ForCausalLM +from research.deepseek3.deepseek3_config import DeepseekV3Config + +from network_patch import deepseekv3_ms_network_obfuscate + +def run_predict(args): + """Deepseek-V3/R1 predict""" + # inputs + input_questions = [args.input] + + # set model config + yaml_file = args.config + config = MindFormerConfig(yaml_file) + build_context(config) + build_parallel_config(config) + model_config = config.model.model_config + model_config.parallel_config = config.parallel_config + model_config.moe_config = config.moe_config + model_config = DeepseekV3Config(**model_config) + + # build tokenizer + tokenizer = LlamaTokenizerFast(config.processor.tokenizer.vocab_file, + config.processor.tokenizer.tokenizer_file, + unk_token=config.processor.tokenizer.unk_token, + bos_token=config.processor.tokenizer.bos_token, + eos_token=config.processor.tokenizer.eos_token, + fast_tokenizer=True) + tokenizer.pad_token = tokenizer.eos_token + + # build model from config + network = InferenceDeepseekV3ForCausalLM(model_config) + ms_model = Model(network) + if config.load_checkpoint: + logger.info("----------------Transform and load checkpoint----------------") + seq_length = model_config.seq_length + input_ids = Tensor(shape=(model_config.batch_size, seq_length), dtype=ms.int32, init=initializer.One()) + infer_data = network.prepare_inputs_for_predict_layout(input_ids) + transform_and_load_checkpoint(config, ms_model, network, infer_data, do_predict=True) + + inputs = tokenizer(input_questions, max_length=64, padding="max_length")["input_ids"] + outputs = network.generate(inputs, + max_length=1024, + do_sample=False, + top_k=5, + top_p=1, + max_new_tokens=128) + # print("outputs before decoding:", outputs) + answer = tokenizer.decode(outputs) + print("answer: ", answer) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--config', + type=str, + required=True, + help='YAML config files, such as' + '/home/ma-user/work/vllm-mindspore/install_depend_pkgs/mindformers-br_infer_boom/research/deepseek3/deepseek3_671b/predict_deepseek3_671b.yaml', + ) + parser.add_argument( + '--input', + type=str, + default="生抽和老抽的区别是什么?") + args_ = parser.parse_args() + + run_predict(args_) \ No newline at end of file diff --git a/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py b/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py index 21af387..40dfe1c 100644 --- a/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py +++ b/examples/model_protection/deepseekv3/infer/network_patch/deepseekv3_ms_network_obfuscate.py @@ -9,17 +9,17 @@ from research.deepseek3.deepseek3_model_infer import DeepseekV3Model, DeepseekV3 from vllm_mindspore.model_executor.models.mf_models.deepseekv3_weight_processor import DeepseekV3WeightProcessor from mindformers.modules.layers import FreqsMgr, SeqExtendMethod -import ObfuscateFreqsMgr +from .rope_patch.ObfuscateFreqsMgr import ObfuscateFreqsMgr _orig_init = DeepseekV3Model.__init__ def _patched_init(self, config, *args, **kwargs): _orig_init(self, config, *args, **kwargs) - self.token_p = Parameter(Tensor(np.arange(config.hidden_size), mstype.int32), + self.token_p = Parameter(Tensor(np.arange(129280), mstype.int32), name='p_inv', parallel_optimizer=False) - self.emb_p_inv = Parameter(Tensor(np.arange(config.vocab_size), mstype.int32), + self.emb_p_inv = Parameter(Tensor(np.arange(7168), mstype.int32), name='emb_p_inv', parallel_optimizer=False) - self.rope_p = Parameter(Tensor(np.arange(config.hidden_size), mstype.int32), + self.rope_p = Parameter(Tensor(np.arange(32), mstype.int32), name='rope_p', parallel_optimizer=False) self.permute = ops.Gather().set_device('CPU') @@ -39,55 +39,87 @@ def _patched_init(self, config, *args, **kwargs): DeepseekV3Model.__init__ = _patched_init -def _patched_construct(self, tokens: Tensor, batch_valid_length=None, batch_index=None, zactivate_len=None, - block_tables=None, slot_mapping=None): - """ - Forward of deepseekv3 model. - - Args: - tokens: the tokenized inputs with datatype int32 - batch_valid_length(Tensor): the past calculated the index with datatype int32, used for incremental - prediction. Tensor of shape :math:`(batch_size,)`. Default None. - batch_index(Tensor): The generated batch index when use continuous batching in LLM serving. - Tensor of shape :math:`(batch_size,)`. Default None. - zactivate_len(Tensor): The slice length of KVCache when use dynamic shape infer. - Tensor of shape :math:`(seq_length,)`. Default None. - block_tables(Tensor[int64]): Store mapping tables for each sequence. - slot_mapping(Tensor[int32]): Store token cache physical slot index. - - Returns: - output: Tensor, the output of deepseekv3 decoderlayer - """ - # preprocess - bs, seq_len = self.shape(tokens) - mask = None - if self.use_past: - if self.is_first_iteration: - freqs_cis = self.freqs_mgr.prefill(bs, seq_len) - if not self.is_pynative: - mask = self.casual_mask.prefill() - else: - mask = self.casual_mask(tokens) - else: - freqs_cis = self.freqs_mgr.increment(batch_valid_length) - - else: - mask = self.casual_mask(tokens) - freqs_cis = self.freqs_mgr(seq_len) - - tokens = self.permute(self.token_p, tokens, 0) +def _patched_construct(self, tokens: Tensor, h=None, batch_valid_length=None, batch_index=None, zactivate_len=None, + block_tables=None, slot_mapping=None, position_ids=None, q_seq_lens=None, + attention_mask=None, attn_padding_idx=None, attn_unpadding_idx=None, ffn_padding_idx=None, + ffn_unpadding_idx=None, key_cache=None): + """ + Forward of deepseekv3 model. + + Args: + tokens: the tokenized inputs with datatype int32cd .. + batch_valid_length(Tensor): the past calculated the index with datatype int32, used for incremental + prediction. Tensor of shape :math:`(batch_size,)`. Default None. + batch_index(Tensor): The generated batch index when use continuous batching in LLM serving. + Tensor of shape :math:`(batch_size,)`. Default None. + zactivate_len(Tensor): The slice length of KVCache when use dynamic shape infer. + Tensor of shape :math:`(seq_length,)`. Default None. + block_tables(Tensor[int64]): Store mapping tables for each sequence. + slot_mapping(Tensor[int32]): Store token cache physical slot index. + + Returns: + output: Tensor, the output of deepseekv3 decoderlayer + """ + # preprocess + # print("token_p: ", self.token_p) + # print("emb_p_inv: ", self.emb_p_inv) + # print("rope_p: ", self.rope_p) + tokens = self.permute(self.token_p, tokens, 0) + mask = attention_mask + if self.is_first_iteration: + freqs_cis = self.freqs_mgr.prefill() + else: + freqs_cis = self.freqs_mgr.chunk_with_decode(position_ids) + + if not self.pre_process and self.pipeline_parallel: + if h is None: + raise ValueError("when pipeline stage is not 0, h can not be None.") + else: h = self.cast(self.tok_embeddings(tokens), self.dtype) - h = self.reshape(h, (bs, seq_len, self.hidden_size)) - - for i in range(self.num_layers): + # h = self.cast(h, mstype.float16) + # h = self.recover(h, self.emb_p_inv, 1) + # h = self.cast(h, self.dtype) + # self.print("hidden state after embedding: ", h) + + # for splitting dual batch + split_input = None + split_bvl = None + split_bt = None + split_sm = None + split_qsl = None + + for i in range(self.num_layers): + key_cache_i = key_cache[i] if key_cache is not None else None + if (self.moe_config.first_k_dense_replace and i < self.moe_config.first_k_dense_replace) \ + or not (self.enable_micro_batch and self.is_first_iteration): h = self.layers[i](h, freqs_cis, mask, batch_valid_length=batch_valid_length, - block_tables=block_tables, slot_mapping=slot_mapping) - h = self.cast(h, mstype.float16) - h = self.recover(h, self.emb_p_inv, axis=1) - h = self.cast(h, self.dtype) - output = self.norm_out(h) - return output - + block_tables=block_tables, slot_mapping=slot_mapping, + q_seq_lens=q_seq_lens, attn_padding_idx=attn_padding_idx, + attn_unpadding_idx=attn_unpadding_idx, ffn_padding_idx=ffn_padding_idx, + ffn_unpadding_idx=ffn_unpadding_idx, key_cache=key_cache_i) + else: + # split dual batch in prefilling + if i == self.moe_config.first_k_dense_replace: + split_input, split_bvl, split_bt, split_sm, split_qsl = self._split_micro_batch_input(h, \ + batch_valid_length, block_tables, slot_mapping, q_seq_lens) + split_input = self.layers[i](split_input, freqs_cis, mask, batch_valid_length=split_bvl, + block_tables=split_bt, slot_mapping=split_sm, + q_seq_lens=split_qsl, attn_padding_idx=attn_padding_idx, + attn_unpadding_idx=attn_unpadding_idx, ffn_padding_idx=ffn_padding_idx, + ffn_unpadding_idx=ffn_unpadding_idx, key_cache=key_cache_i) + if i == self.num_layers - 1: + h = mint.concat((split_input[0], split_input[1]), dim=0) + + h = self.cast(h, mstype.float16) + h = self.recover(h, self.emb_p_inv, axis=1) + h = self.cast(h, self.dtype) + + if self.post_process: + h = self.norm_out(h) + # print("hidden states: ", h) + return h + + DeepseekV3Model.construct = _patched_construct @@ -118,92 +150,4 @@ def _patched_infer_convert_outer_weight(self, src_hf_dir, hf_weight_map): requires_grad=False) -DeepseekV3WeightProcessor.infer_convert_outer_weight = _patched_infer_convert_outer_weight - -# def _patched_attention_construct(self, x: Tensor, freqs_cis: Tuple[Tensor, Tensor], mask=None, batch_valid_length=None, -# block_tables=None, slot_mapping=None): -# """ Forward process of the DeepseekV3Attention. """ -# ori_dtype = x.dtype - -# if self.q_lora_rank == 0: -# bs, seq_len, _ = self.shape(x) -# q = self.q_proj(x) -# latent_kv_all = self.kv2l(x) -# latent_kv, k_pe = mint.split(latent_kv_all, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) -# else: -# if self.qkv_concat: -# qkv2l = self.qkv2l(x) -# q, latent_kv, k_pe = mint.split(qkv2l, [self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim], -# dim=-1) -# bs, seq_len, _ = self.shape(q) -# norm_q = self.lq_norm(q) -# q = self.l2q_proj(norm_q) -# else: -# q = self.q2l_proj(x) -# bs, seq_len, _ = self.shape(q) -# norm_q = self.lq_norm(q) -# q = self.l2q_proj(norm_q) -# latent_kv_all = self.kv2l(x) -# latent_kv, k_pe = mint.split(latent_kv_all, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) - -# q = self.reshape(q, (bs, seq_len, self.n_local_heads, self.q_head_dim)) -# q_nope, q_pe = mint.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) -# i_kv = self.lkv_norm(latent_kv) - -# k_pe = self.reshape(k_pe, (bs, seq_len, 1, self.qk_rope_head_dim)) - -# freqs_cos_q, freqs_sin_q, freqs_cos_k, freqs_sin_k, swap_mask = freqs_cis -# freqs_cis_q = (freqs_cos_q, freqs_sin_q, swap_mask) -# freqs_cis_k = (freqs_cos_k, freqs_sin_k, swap_mask) - -# q_pe, _ = self.apply_rotary_emb(q_pe, k_pe, freqs_cis_q, batch_valid_length) -# _, k_pe = self.apply_rotary_emb(q_pe, k_pe, freqs_cis_k, batch_valid_length) - -# q_pe = self.reshape(q_pe, (bs, seq_len, self.n_local_heads, self.qk_rope_head_dim)) -# k_pe = self.reshape(k_pe, (bs, seq_len, 1, self.qk_rope_head_dim)) - -# key_states_cache = self.kpe_concat((i_kv, k_pe.view(bs, seq_len, self.qk_rope_head_dim))) -# key_out = self.infer_attention.paged_attention_mgr(key_states_cache, slot_mapping) -# q_nope = ops.depend(q_nope, key_out) - -# if self.is_first_iteration: -# o_k_nope = self.lkv2kv_k_nope(i_kv) -# o_v = self.lkv2kv_v(i_kv) -# k_nope = self.reshape(o_k_nope, (bs, seq_len, self.n_local_heads, self.qk_nope_head_dim)) -# value_states = self.reshape(o_v, (bs, seq_len, self.n_local_heads, self.v_head_dim)) -# query_states = self.pe_concat((q_nope, q_pe)) -# k_pe = self.tile_kv(k_pe, (1, 1, self.n_local_heads, 1)) -# key_states = self.pe_concat((k_nope, k_pe)) -# value_states = self.pe_concat((value_states, k_pe)) - -# key_states = key_states.view(bs, seq_len, -1) -# value_states = value_states.view(bs, seq_len, -1) -# query_states = query_states.view(bs, seq_len, -1) - -# context_layer = self.infer_attention(query_states, key_states, value_states, batch_valid_length, -# block_tables, mask) - -# context_layer = context_layer.view(bs, seq_len, self.n_local_heads, self.q_head_dim) -# context_layer = self.dim_slice_4d(context_layer, (0, 0, 0, 0), (bs, seq_len, self.n_local_heads, -# self.v_head_dim)) -# attn_out = context_layer.view(bs, seq_len, self.n_local_heads * self.v_head_dim) -# output = self.wo(attn_out) -# output = self.cast(output, ori_dtype) -# return output - -# q_absorb = self.lkv2kv_k_nope.weight.view(self.n_local_heads, self.qk_nope_head_dim, self.kv_lora_rank) -# out_absorb = self.lkv2kv_v.weight.view(self.n_local_heads, self.v_head_dim, self.kv_lora_rank) -# q_nope = self.qabsorb_matmul(q_nope.transpose(0, 2, 1, 3), q_absorb).transpose(0, 2, 1, 3) -# query_states = self.pe_concat((q_nope, q_pe)) -# query_states = query_states.view(bs, seq_len, -1) -# key_states = key_states_cache -# context_layer = self.infer_attention(query_states, key_states, key_states, batch_valid_length, -# block_tables, attn_mask=mask) -# context_layer = context_layer.view(bs, seq_len, self.n_local_heads, -1).transpose(0, 2, 1, 3) -# attn_out = self.outabsorb_matmul(context_layer, out_absorb).transpose(0, 2, 1, 3) -# attn_out = attn_out.view(bs, seq_len, self.n_local_heads * self.v_head_dim) -# output = self.wo(attn_out) -# output = self.cast(output, ori_dtype) -# return output - -# DeepseekV3Attention.construct = _patched_attention_construct \ No newline at end of file +DeepseekV3WeightProcessor.infer_convert_outer_weight = _patched_infer_convert_outer_weight \ No newline at end of file diff --git a/examples/model_protection/deepseekv3/infer/network_patch/rope_patch/ObfuscateFreqsMgr.py b/examples/model_protection/deepseekv3/infer/network_patch/rope_patch/ObfuscateFreqsMgr.py new file mode 100644 index 0000000..4a0cc69 --- /dev/null +++ b/examples/model_protection/deepseekv3/infer/network_patch/rope_patch/ObfuscateFreqsMgr.py @@ -0,0 +1,185 @@ +import numpy as np +import math +from enum import Enum + +from mindspore.ops import operations as P +from mindspore.common.initializer import initializer, Tensor, Normal +import mindspore.common.dtype as mstype +from mindspore.nn.cell import Cell +from mindformers.tools.utils import is_pynative + +from mindspore.parallel._utils import _get_parallel_mode, _is_sharding_propagation +from mindspore.context import ParallelMode + +from mindformers.modules.layers import _yarn_find_correction_dim, _yarn_find_correction_range, _yarn_get_mscale, _yarn_linear_ramp_mask, _check_llama3_scaling_factor, _check_yarn_scaling_factor, _check_linear_scaling_factor, SeqExtendMethod + +class ObfuscateFreqsMgr(Cell): + r"""freqs_cis manager.""" + + def __init__(self, + head_dim, + seq_length=None, + max_position_embedding=4096, + rotary_dtype=mstype.float16, + theta=10000, + scaling_factor=1.0, + extend_method=SeqExtendMethod.NONE.value, + parallel_config=None, + is_dynamic=False, + limit_not_apply_seq_pipe=False, + rope_p=None, + rope_permute=None): + super().__init__() + self.is_pynative = is_pynative() + if seq_length is not None and seq_length > max_position_embedding: + max_position_embedding = seq_length + if extend_method == SeqExtendMethod.NTK.value: + theta *= scaling_factor + freqs_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) # (head_dim // 2, ) + freqs = 1.0 / (theta ** (freqs_base / head_dim)) # (head_dim // 2, ) + mscale = 1.0 + if extend_method == SeqExtendMethod.LINEAR.value: + _check_linear_scaling_factor(scaling_factor) + factor = scaling_factor["factor"] + freqs /= factor + + if extend_method == SeqExtendMethod.YARN.value: + _check_yarn_scaling_factor(scaling_factor, max_position_embedding) + factor = scaling_factor["factor"] + beta_fast = scaling_factor["beta_fast"] + beta_slow = scaling_factor["beta_slow"] + base = theta + original_max_position_embeddings = scaling_factor["original_max_position_embeddings"] + mscale_all_dim = scaling_factor["mscale_all_dim"] + mscale_ = scaling_factor["mscale"] + + internal_freq_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) + internal_freq = 1.0 / (factor * theta ** (internal_freq_base / head_dim)) + + extra_freq_base = np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(np.float32) + extra_freq = 1.0 / (theta ** (extra_freq_base / head_dim)) + + low, high = _yarn_find_correction_range(beta_fast, beta_slow, head_dim, base, + original_max_position_embeddings) + inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(low, high, head_dim // 2) + freqs = internal_freq * (1 - inv_freq_mask) + extra_freq * inv_freq_mask + mscale = float(_yarn_get_mscale(factor, mscale_) + / _yarn_get_mscale(factor, mscale_all_dim)) + + if extend_method == SeqExtendMethod.LLAMA3.value: + _check_llama3_scaling_factor(scaling_factor, max_position_embedding) + + factor = scaling_factor["factor"] + if factor is None or not isinstance(factor, float) or factor < 1.0: + raise ValueError(f"`scaling_factor`'s factor field must be a float >= 1, got {factor}") + + factor = scaling_factor["factor"] + low_freq_factor = scaling_factor["low_freq_factor"] + high_freq_factor = scaling_factor["high_freq_factor"] + old_context_len = scaling_factor["original_max_position_embeddings"] + + low_freq_wavelen = old_context_len / low_freq_factor + high_freq_wavelen = old_context_len / high_freq_factor + new_freqs = [] + for freq in freqs: + wavelen = 2 * math.pi / freq + if wavelen < high_freq_wavelen: + new_freqs.append(freq) + elif wavelen > low_freq_wavelen: + new_freqs.append(freq / factor) + else: + if low_freq_wavelen == high_freq_wavelen: + raise ValueError(f"low_freq_wavelen should not equal high_freq_wavelen, " + f"but low_freq_wavelen got {low_freq_wavelen}," + f"high_freq_wavelen got {high_freq_wavelen}.") + smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor) + new_freqs.append((1 - smooth) * freq / factor + smooth * freq) + freqs = np.array(new_freqs, dtype=freqs.dtype) + + if extend_method == SeqExtendMethod.PI.value: + t = np.arange(0, max_position_embedding / scaling_factor, 1 / scaling_factor).astype(np.float32) + else: + t = np.arange(0, max_position_embedding, 1).astype(np.float32) + + freqs = np.outer(t, freqs) # (max_position_embedding, head_dim // 2) + freqs = Tensor.from_numpy(freqs) + freqs = rope_permute(freqs, rope_p, axis=1) + freqs = freqs.asnumpy() + phase_shift = np.random.uniform(-0.1, 0.1, size=(head_dim // 2,)).astype(np.float32) + freqs = freqs + phase_shift + emb = np.concatenate((freqs, freqs), axis=-1) + freqs_cos = np.cos(emb) * mscale # (seq_len, head_dim) + freqs_sin = np.sin(emb) * mscale # (seq_len, head_dim) + swap_mask = ObfuscateFreqsMgr.get_swap_mask(head_dim) + + if parallel_config is not None and parallel_config.context_parallel > 1: + self.context_parallel = parallel_config.context_parallel + else: + self.context_parallel = 1 + self.head_dim = head_dim + self.is_dynamic = is_dynamic + self.freqs_cos = Tensor(freqs_cos, dtype=rotary_dtype) + self.freqs_sin = Tensor(freqs_sin, dtype=rotary_dtype) + self.swap_mask = Tensor(swap_mask, dtype=rotary_dtype) + + self.reshape = P.Reshape() + self.slice = P.StridedSlice() + self.gather = P.Gather() + self.tile = P.Tile() + if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL, ParallelMode.SEMI_AUTO_PARALLEL): + self.slice.shard(((1, 1),)) + self.gather.shard(((1, 1), (1,))) + self.tile.shard(((1, 1),)) + self.seq_pipe = parallel_config and parallel_config.seq_split_num and parallel_config.seq_split_num > 1 \ + and not limit_not_apply_seq_pipe + if self.seq_pipe: + self.seq_split_num = parallel_config.seq_split_num + self.seq_seg_len = seq_length // self.seq_split_num + np_range = np.arange(self.seq_seg_len) + self.seq_seg_range = Tensor(np_range, dtype=mstype.int32) + self.add_seq = P.Add() + + def construct(self, seq_length=None, seq_chunk=None): + """Get freqs_cos and freqs_sin""" + if self.seq_pipe: + seg_seq_range = self.add_seq(self.seq_seg_range, self.seq_seg_len * seq_chunk) + freqs_cos = self.gather(self.freqs_cos, seg_seq_range, 0) + freqs_sin = self.gather(self.freqs_sin, seg_seq_range, 0) + else: + freqs_cos = self.slice(self.freqs_cos, (0, 0), (seq_length, self.head_dim), (1, 1)) + freqs_sin = self.slice(self.freqs_sin, (0, 0), (seq_length, self.head_dim), (1, 1)) + freqs_cos = self.reshape(freqs_cos, (-1, 1, seq_length, self.head_dim)) + freqs_sin = self.reshape(freqs_sin, (-1, 1, seq_length, self.head_dim)) + return freqs_cos, freqs_sin, self.swap_mask + + def prefill(self, bs, seq_length): + if self.is_dynamic and not self.is_pynative: + return self.freqs_cos, self.freqs_sin, self.swap_mask + freqs_cos = self.tile(self.slice(self.freqs_cos, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) + freqs_sin = self.tile(self.slice(self.freqs_sin, (0, 0), (seq_length, self.head_dim), (1, 1)), (bs, 1)) + return freqs_cos, freqs_sin, self.swap_mask + + def increment(self, batch_valid_length): + indices = batch_valid_length - 1 + freqs_cos = self.gather(self.freqs_cos, indices, 0) + freqs_sin = self.gather(self.freqs_sin, indices, 0) + return freqs_cos, freqs_sin, self.swap_mask + + def increment_multi_ids(self, indices): + indices = indices.reshape(-1) + freqs_cos = self.gather(self.freqs_cos, indices, 0) + freqs_sin = self.gather(self.freqs_sin, indices, 0) + return freqs_cos, freqs_sin, self.swap_mask + + def chunk_with_decode(self, seq_range): + """Obtain the position encoding of chunks and increments""" + freqs_cos = self.gather(self.freqs_cos, seq_range, 0) + freqs_sin = self.gather(self.freqs_sin, seq_range, 0) + return freqs_cos, freqs_sin, self.swap_mask + + @staticmethod + def get_swap_mask(head_dim): + """Swap matrix""" + zero_block = np.zeros((head_dim // 2, head_dim // 2), dtype=np.float32) + id_block = np.identity(head_dim // 2, dtype=np.float32) + return np.block([[zero_block, id_block], [-id_block, zero_block]]) diff --git a/examples/model_protection/deepseekv3/infer/network_patch/rope_patch/__init__.py b/examples/model_protection/deepseekv3/infer/network_patch/rope_patch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_huggingface.yaml b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_huggingface.yaml new file mode 100644 index 0000000..c58762a --- /dev/null +++ b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_huggingface.yaml @@ -0,0 +1,320 @@ +obf_metadata_config: +- name: rope_p + shape: [32, ] + type: rearrange + save_metadata: True +- name: token_p + shape: [129280, ] + type: rearrange + save_metadata: True +- name: token_p_inv + shape: [129280, ] + type: rearrange + save_metadata: False +- name: emb_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: emb_p_inv + shape: [7168, ] + type: rearrange + save_metadata: True +- name: qa_p + shape: [1536, ] + type: rearrange + save_metadata: False +- name: qa_p_inv + shape: [1536, ] + type: rearrange + save_metadata: False +- name: qb_p + shape: [24576, ] + type: rearrange + save_metadata: False +- name: kva_p + shape: [576, ] + type: rearrange + save_metadata: False +- name: kva_p_nope + shape: [512, ] + type: rearrange + save_metadata: False +- name: kva_p_nope_inv + shape: [512, ] + type: rearrange + save_metadata: False +- name: kvb_p + shape: [32768, ] + type: rearrange + save_metadata: False +- name: vb_p_inv + shape: [16384, ] + type: rearrange + save_metadata: False +- name: o_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: o_p_inv + shape: [7168, ] + type: rearrange + save_metadata: False +- name: mlp_up_p + shape: [18432, ] + type: rearrange + save_metadata: False +- name: mlp_up_p_inv + shape: [18432, ] + type: rearrange + save_metadata: False +- name: mlp_down_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: expert_up_p + shape: [2048, ] + type: rearrange + save_metadata: False +- name: expert_up_p_inv + shape: [2048, ] + type: rearrange + save_metadata: False +- name: expert_down_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: shared_expert_up_p + shape: [2048, ] + type: rearrange + save_metadata: False +- name: shared_expert_up_p_inv + shape: [2048, ] + type: rearrange + save_metadata: False +- name: shared_expert_down_p + shape: [7168, ] + type: rearrange + save_metadata: False + +weight_obf_config: +- target: model/embed_tokens/weight + weight_obf_ops: + - name: permuate + input_x: weight + input_y: token_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/self_attn/q_a_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: qa_p + axis: 0 + +- target: model/layers/${layer}/self_attn/q_a_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: qa_p + axis: 0 + +- target: model/layers/${layer}/self_attn/q_b_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: qa_p + axis: 1 + - name: permuate + input_x: weight + input_y: qb_p + axis: 0 + +- target: model/layers/${layer}/self_attn/kv_a_proj_with_mqa/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: kva_p + axis: 0 + +- target: model/layers/${layer}/self_attn/kv_a_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: kva_p_nope + axis: 0 + +- target: model/layers/${layer}/self_attn/kv_b_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: kva_p_nope + axis: 1 + - name: permuate + input_x: weight + input_y: kvb_p + axis: 0 + +- target: model/layers/${layer}/self_attn/o_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: vb_p + axis: 1 + - name: permuate + input_x: weight + input_y: o_p + axis: 0 + +- target: model/layers/${layer}/mlp/gate_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 0 + +- target: model/layers/${layer}/mlp/up_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 0 + +- target: model/layers/${layer}/mlp/down_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 1 + - name: permuate + input_x: weight + input_y: mlp_down_p + axis: 0 + +- target: model/layers/${layer}/input_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 0 + +- target: model/layers/${layer}/post_attention_layernorm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 0 + +- target: model/layers/${layer}/mlp/gate/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/mlp/shared_experts/gate_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 0 + +- target: model/layers/${layer}/mlp/shared_experts/up_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_ + axis: 1 + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 0 + +- target: model/layers/${layer}/mlp/shared_experts/down_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 1 + - name: permuate + input_x: weight + input_y: shared_expert_down_p + axis: 0 + +- target: model/layers/${layer}/mlp/experts/${expert}/gate_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 0 + +- target: model/layers/${layer}/mlp/experts/${expert}/up_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 0 + +- target: model/layers/${layer}/mlp/experts/${expert}/down_proj/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 1 + - name: permuate + input_x: weight + input_y: expert_down_p + axis: 0 \ No newline at end of file diff --git a/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_ms.yaml b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_ms.yaml new file mode 100644 index 0000000..9fec173 --- /dev/null +++ b/examples/model_protection/deepseekv3/obfuscation/config/deepseekv3_obf_config_ms.yaml @@ -0,0 +1,320 @@ +obf_metadata_config: +- name: rope_p + shape: [32, ] + type: rearrange + save_metadata: True +- name: token_p + shape: [129280, ] + type: rearrange + save_metadata: True +- name: token_p_inv + shape: [129280, ] + type: rearrange + save_metadata: False +- name: emb_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: emb_p_inv + shape: [7168, ] + type: rearrange + save_metadata: True +- name: qa_p + shape: [1536, ] + type: rearrange + save_metadata: False +- name: qa_p_inv + shape: [1536, ] + type: rearrange + save_metadata: False +- name: qb_p + shape: [24576, ] + type: rearrange + save_metadata: False +- name: kva_p + shape: [576, ] + type: rearrange + save_metadata: False +- name: kva_p_nope + shape: [512, ] + type: rearrange + save_metadata: False +- name: kva_p_nope_inv + shape: [512, ] + type: rearrange + save_metadata: False +- name: kvb_p + shape: [32768, ] + type: rearrange + save_metadata: False +- name: vb_p_inv + shape: [16384, ] + type: rearrange + save_metadata: False +- name: o_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: o_p_inv + shape: [7168, ] + type: rearrange + save_metadata: False +- name: mlp_up_p + shape: [18432, ] + type: rearrange + save_metadata: False +- name: mlp_up_p_inv + shape: [18432, ] + type: rearrange + save_metadata: False +- name: mlp_down_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: expert_up_p + shape: [2048, ] + type: rearrange + save_metadata: False +- name: expert_up_p_inv + shape: [2048, ] + type: rearrange + save_metadata: False +- name: expert_down_p + shape: [7168, ] + type: rearrange + save_metadata: False +- name: shared_expert_up_p + shape: [2048, ] + type: rearrange + save_metadata: False +- name: shared_expert_up_p_inv + shape: [2048, ] + type: rearrange + save_metadata: False +- name: shared_expert_down_p + shape: [7168, ] + type: rearrange + save_metadata: False + +weight_obf_config: +- target: model/tok_embeddings/embedding_weight + weight_obf_ops: + - name: permuate + input_x: weight + input_y: token_p_inv + axis: 0 + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/attention/q2l_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: qa_p + axis: 0 + +- target: model/layers/${layer}/attention/lq_norm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: qa_p + axis: 0 + +- target: model/layers/${layer}/attention/l2q_proj/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: qa_p + axis: 1 + - name: permuate + input_x: weight + input_y: qb_p + axis: 0 + +- target: model/layers/${layer}/attention/kv2l/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: kva_p + axis: 0 + +- target: model/layers/${layer}/attention/lkv_norm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: kva_p_nope + axis: 0 + +- target: model/layers/${layer}/attention/lkv2kv/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: kva_p_nope + axis: 1 + - name: permuate + input_x: weight + input_y: kvb_p + axis: 0 + +- target: model/layers/${layer}/attention/wo/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: vb_p + axis: 1 + - name: permuate + input_x: weight + input_y: o_p + axis: 0 + +- target: model/layers/${layer}/feed_forward/w1/weight + layers: [0, 1, 2] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 0 + +- target: model/layers/${layer}/feed_forward/w3/weight + layers: [0, 1, 2] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 0 + +- target: model/layers/${layer}/feed_forward/w2/weight + layers: [0, 1, 2] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: mlp_up_p + axis: 1 + - name: permuate + input_x: weight + input_y: mlp_down_p + axis: 0 + +- target: model/layers/${layer}/attention_norm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 0 + +- target: model/layers/${layer}/ffn_norm/weight + layers: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 0 + +- target: model/layers/${layer}/feed_forward/routed_experts/router/dense/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + +- target: model/layers/${layer}/feed_forward/shared_experts/w1/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 0 + +- target: model/layers/${layer}/feed_forward/shared_experts/w3/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 0 + +- target: model/layers/${layer}/feed_forward/shared_experts/w2/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: shared_expert_up_p + axis: 1 + - name: permuate + input_x: weight + input_y: shared_expert_down_p + axis: 0 + +- target: model/layers/${layer}/feed_forward/routed_experts/ffn/w1/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + # experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 2 + +- target: model/layers/${layer}/feed_forward/routed_experts/ffn/w3/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + # experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: emb_p + axis: 1 + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 2 + +- target: model/layers/${layer}/feed_forward/routed_experts/ffn/w2/weight + layers: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + # experts: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255] + weight_obf_ops: + - name: permuate + input_x: weight + input_y: expert_up_p + axis: 1 + - name: permuate + input_x: weight + input_y: expert_down_p + axis: 2 \ No newline at end of file diff --git a/mindarmour/model_protection/obfuscation.py b/mindarmour/model_protection/obfuscation.py index 0de023c..22a6e35 100644 --- a/mindarmour/model_protection/obfuscation.py +++ b/mindarmour/model_protection/obfuscation.py @@ -327,6 +327,8 @@ class ModelObfuscator: obf_param = obf_param[p] elif axis == 1: obf_param = obf_param[:, p] + elif axis == 2: + obf_param = obf_param[:, :, p] else: raise ValueError('axis should be 0 or 1, but got {}'.format(axis)) elif op_name == 'matmul': @@ -450,7 +452,7 @@ class ModelObfuscator: try: with safe_open(src_file, framework="np") as f: for param_name in f.keys(): - #print(hf_param_name) + # print(param_name) params[param_name] = f.get_tensor(param_name) index["weight_map"][param_name] = file_name not_obfuscated_params.append(param_name) @@ -472,3 +474,4 @@ class ModelObfuscator: save_file(params, obf_file_name) index["metadata"]["total_size"] += os.path.getsize(obf_file_name) return not_obfuscated_params + -- Gitee