diff --git a/aikg/benchmark/aikgbench/llm/common/Concat.py b/aikg/benchmark/aikgbench/llm/common/Concat.py new file mode 100644 index 0000000000000000000000000000000000000000..c9ce58e49bb5d23f0c2e84466a1461a42e2e8ea3 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Concat.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs concatenation of input tensors. + """ + + def __init__(self, concat_dim): + super(Model, self).__init__() + self.concat_dim = concat_dim + + def forward(self, x, y): + """ + Perform concatenation of two input tensors. + + Args: + x: First input tensor + y: Second input tensor + + Returns: + Concatenated tensor along the specified dimension + """ + return torch.cat((x, y), dim=self.concat_dim) + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with float16 data type as tested in test_concat.py + """ + # Use float16 test case + shape1 = (28, 32, 4096) + shape2 = (28, 64, 4096) + + # Generate random tensors similar to test_concat.py + input0 = torch.randn(shape1, dtype=torch.float16) + input1 = torch.randn(shape2, dtype=torch.float16) + return [input0, input1] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For concatenation, we need the concatenation dimension. + Default to 1 (second dimension) as in test_concat.py. + """ + return [1] # Default concatenation dimension (dim=1) diff --git a/aikg/benchmark/aikgbench/llm/common/Cumsum.py b/aikg/benchmark/aikgbench/llm/common/Cumsum.py new file mode 100644 index 0000000000000000000000000000000000000000..ee8dce31ff825680dc9bd98fc4594ff20ddd6521 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Cumsum.py @@ -0,0 +1,58 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs cumulative sum operation. + """ + + def __init__(self, dim=1): + super(Model, self).__init__() + self.dim = dim + + def forward(self, x): + """ + Perform cumulative sum operation on input tensor. + + Args: + x: Input tensor + + Returns: + Cumulative sum tensor along the specified dimension + """ + return torch.cumsum(x, dim=self.dim) + + +# Model parameters - using the same parameters as in test_cumsum.py +# Default parameters for cumsum operation +default_params = { + 'dim': 1, # Default axis/dimension for cumsum + 'shape': (28, 32, 4096), # Default tensor shape + 'dtype': torch.float16 # Default data type +} + +# Supported axes for cumsum operation +supported_axes = [0, 1, 2] # For 3D tensor (28, 32, 4096) + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_cumsum.py + """ + # Use default parameters from test_cumsum.py + shape = default_params['shape'] + dtype = default_params['dtype'] + + # Generate random tensor similar to test_cumsum.py + input0 = torch.randn(shape, dtype=dtype) + return [input0] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For cumsum operation, we need the dimension/axis. + """ + return [default_params['dim']] # Default dimension (axis=1) diff --git a/aikg/benchmark/aikgbench/llm/common/DynamicNTK.py b/aikg/benchmark/aikgbench/llm/common/DynamicNTK.py new file mode 100644 index 0000000000000000000000000000000000000000..033d5cc0fcfe851bed88fc021c2c5fbd28b186e8 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/DynamicNTK.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn +import math + + +class Model(nn.Module): + """ + Simple model that performs dynamic NTK (Neural Tangent Kernel) operation. + """ + + def __init__(self, output_type=1): + super(Model, self).__init__() + self.output_type = output_type + + def forward(self, position_ids, inv_freqs, seq_lens): + """ + Perform dynamic NTK operation to generate sinusoidal position embeddings. + + Args: + position_ids: Position IDs tensor [num_tokens] + inv_freqs: Inverse frequencies tensor [batch, dim/2] + seq_lens: Sequence lengths tensor [batch] + + Returns: + Tuple of (sin_output, cos_output) tensors + """ + off = 0 + num_tokens = position_ids.shape[0] + dim = inv_freqs.shape[1] * 2 + batch_num = seq_lens.shape[0] + + # Determine output type + otype = torch.float16 if self.output_type == 0 else torch.bfloat16 + + sin_out = torch.zeros([num_tokens, dim], dtype=torch.float32) + cos_out = torch.zeros([num_tokens, dim], dtype=torch.float32) + + for batch_id in range(batch_num): + pos_len = seq_lens[batch_id] + freqs = torch.einsum('i,j->ij', + position_ids[off:off + + pos_len].to(torch.float32), + inv_freqs[batch_id]) + emb = torch.cat((freqs, freqs), dim=-1) + cos_out[off:off + pos_len, :] = emb.cos() + sin_out[off:off + pos_len, :] = emb.sin() + off += pos_len + + return sin_out.to(otype), cos_out.to(otype) + + +# Model parameters - using the same parameters as in test_dynamic_ntk.py +# Default parameters for dynamic NTK operation +default_params = { + 'output_type': 1, # Default output type (1 for bfloat16, 0 for float16) + 'batch': 16, # Default batch size + 'num_tokens': 256, # Default number of tokens + 'dim': 128, # Default dimension + 'max_seq_len': 256000 # Default maximum sequence length +} + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_dynamic_ntk.py + """ + # Use default parameters from test_dynamic_ntk.py + batch = default_params['batch'] + num_tokens = default_params['num_tokens'] + dim = default_params['dim'] + max_seq_len = default_params['max_seq_len'] + + # Generate test data similar to test_dynamic_ntk.py + aux_array = torch.arange(0, dim, 2, dtype=torch.float32) / dim + batch_base = torch.randint(10000, 50000, [batch], dtype=torch.float32) + position_ids = torch.randint( + 0, max_seq_len, [num_tokens], dtype=torch.int32) + inv_freqs = torch.zeros([batch, int(dim / 2)], dtype=torch.float32) + + for i in range(batch): + inv_freqs[i, :] = 1.0 / batch_base[i] ** aux_array + + avg_seq_len = int(num_tokens / batch) + seq_lens = torch.ones([batch], dtype=torch.int32) * avg_seq_len + seq_lens[0] += num_tokens - avg_seq_len * batch + + return [position_ids, inv_freqs, seq_lens] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For dynamic NTK operation, we need the output type. + """ + return [default_params['output_type']] # Default output type (1 for bfloat16) diff --git a/aikg/benchmark/aikgbench/llm/common/FastSoftMax.py b/aikg/benchmark/aikgbench/llm/common/FastSoftMax.py new file mode 100644 index 0000000000000000000000000000000000000000..947464f2f3fabe4cd8c41cc5c26bc3f7ae72bbf3 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/FastSoftMax.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs fast softmax operation. + """ + + def __init__(self, head_num=8, q_seq_len=None): + super(Model, self).__init__() + self.head_num = head_num + self.q_seq_len = q_seq_len if q_seq_len is not None else [ + 200] # Default sequence length + + def forward(self, x): + """ + Perform fast softmax operation on input tensor. + + Args: + x: Input tensor to be processed + + Returns: + Softmax output tensor + """ + golden = torch.empty_like(x) + start = 0 + + for i in range(len(self.q_seq_len)): + end = start + self.head_num * self.q_seq_len[i] * self.q_seq_len[i] + cur_data_input = x[start:end].reshape(-1, self.q_seq_len[i]) + cur_golden = torch.softmax(cur_data_input.to( + torch.float32), dim=-1).to(torch.float16) + golden[start:end] = cur_golden.reshape(-1) + start = end + + return golden + + +# Model parameters - using the same parameters as in test_fastsoftmax_operation.py +# Default parameters for fast softmax operation +default_params = { + 'head_num': 8, # Default number of heads + 'batch_size': 4, # Default batch size + 'seq_len_range': (100, 300) # Default sequence length range +} + +# Default sequence lengths for testing +default_seq_lens = [200, 150, 250, 180] # Example sequence lengths + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_fastsoftmax_operation.py + """ + # Use default parameters from test_fastsoftmax_operation.py + batch_size = default_params['batch_size'] + head_num = default_params['head_num'] + q_seq_len = default_seq_lens + + # Generate data_input_list based on q_seq_len + data_input_list = [] + for i in range(batch_size): + data_input = torch.randn( + head_num * q_seq_len[i] * q_seq_len[i]).to(torch.float16) + data_input_list.append(data_input) + + # Concatenate all inputs + data_input = torch.cat(data_input_list) + + return [data_input] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For fast softmax operation, we need head_num and q_seq_len. + """ + # Use default sequence lengths for initialization + return [default_params['head_num'], default_seq_lens] diff --git a/aikg/benchmark/aikgbench/llm/common/Fill.py b/aikg/benchmark/aikgbench/llm/common/Fill.py new file mode 100644 index 0000000000000000000000000000000000000000..772636148788f633a34ad1e6f9f8e1a604a0674b --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Fill.py @@ -0,0 +1,63 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs fill operation with mask. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, value): + """ + Perform fill operation on input tensor. + + Args: + x: Input tensor to be filled + value: Value to fill with + + Returns: + Filled tensor + """ + return torch.full_like(x, value.item()) + + +# Model parameters - using the same parameters as in test_fill.py +# Default parameters for fill operation +default_params = { + 'with_mask': True, # Default to use mask + 'value': -10000, # Default fill value + 'shape': (5, 5), # Default tensor shape + 'dtype': torch.float16 # Default data type +} + +# Supported fill values for testing +supported_values = [-10000, 10000] + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_fill.py + """ + # Use default parameters from test_fill.py + shape = default_params['shape'] + dtype = default_params['dtype'] + + # Generate input tensor similar to test_fill.py + input0 = torch.rand(shape).to(dtype) + + # Generate fill value + input1 = torch.tensor([default_params['value']], dtype=dtype) + + return [input0, input1] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For fill operation, no parameters needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/common/Gather.py b/aikg/benchmark/aikgbench/llm/common/Gather.py new file mode 100644 index 0000000000000000000000000000000000000000..b3046eb9746746942e585f8bbed83677f34528be --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Gather.py @@ -0,0 +1,63 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs gather operation along a specified axis. + """ + + def __init__(self): + super(Model, self).__init__() + self.axis = 1 + + def forward(self, x, indices): + """ + Perform gather operation on input tensor using indices along the specified axis. + + Args: + x: Input tensor to gather from + indices: Indices tensor + + Returns: + Gathered tensor + """ + return torch.gather(x, dim=self.axis, index=indices) + + +# Model parameters - using the same parameters as in test_gather.py +# Default parameters for gather operation +default_params = { + 'axis': 1, # Default axis for gather + 'input_shape': (3, 5), # Default input tensor shape + 'indices_shape': (3, 4), # Default indices tensor shape + 'dtype': torch.float16, # Default data type + 'indices_dtype': torch.int64 # Default indices data type +} + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_gather.py + """ + # Use default parameters from test_gather.py + input_shape = default_params['input_shape'] + indices_shape = default_params['indices_shape'] + dtype = default_params['dtype'] + indices_dtype = default_params['indices_dtype'] + + # Generate input tensor + input0 = torch.randn(input_shape, dtype=dtype) + # Generate indices tensor (values in range [0, input_shape[axis])) + input1 = torch.randint( + 0, input_shape[1], indices_shape, dtype=indices_dtype) + return [input0, input1] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For gather operation, no parameters needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/common/IndexAdd.py b/aikg/benchmark/aikgbench/llm/common/IndexAdd.py new file mode 100644 index 0000000000000000000000000000000000000000..bd0916e8bd73632fbad9d46279e9773df23417eb --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/IndexAdd.py @@ -0,0 +1,62 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs index add operation. + """ + + def __init__(self, index_type, axis): + super(Model, self).__init__() + self.index_type = index_type + self.axis = axis + + def forward(self, x, indices, values, alpha): + """ + Perform index add operation on input tensor. + + Args: + x: Input tensor to be modified + indices: Indices tensor + values: Values tensor to add + alpha: Alpha scaling factor + + Returns: + Modified tensor + """ + if self.index_type == 1: + cloned_x = x.clone() + cloned_x.index_add_(self.axis, indices, values, alpha=alpha.item()) + return cloned_x + + return x + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on test_index_add.py + """ + axis = 0 + n, k = 1024, 4096 + num_indices = 90 + shape0 = (n, k) + shape1 = (num_indices,) + shape2 = (num_indices, k) + shape3 = (1,) + + input0 = torch.rand(shape0, dtype=torch.half) + input1 = torch.arange(num_indices, dtype=torch.int32) + input2 = torch.rand(shape2, dtype=torch.half) + input3 = torch.rand(shape3, dtype=torch.half) + + return [input0, input1, input2, input3] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on test_index_add.py parameters + """ + return [1, 0] # indexType=1, axis=0 diff --git a/aikg/benchmark/aikgbench/llm/common/KvCache.py b/aikg/benchmark/aikgbench/llm/common/KvCache.py new file mode 100644 index 0000000000000000000000000000000000000000..ddb2c73d4625a143553c9423628b95806ceeef6a --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/KvCache.py @@ -0,0 +1,79 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs KV cache update operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, newkv, layer_id, cache_in, token_offset, seqlen): + """ + Update KV cache with newkv according to token_offset and seqlen. + + Args: + newkv: [ntokens, hidden_size] + layer_id: [1] (int tensor) + cache_in: [layer, batch, max_seqlen, hidden_size] + token_offset: [batch] (int tensor) + seqlen: [batch] (int tensor) + + Returns: + cache_out: [layer, batch, max_seqlen, hidden_size] + """ + # Clone cache_in to cache_out + cache_out = cache_in.clone() + layer_id_val = layer_id.item() if layer_id.numel( + ) == 1 else int(layer_id[0].item()) + batch = seqlen.shape[0] + hidden_size = newkv.shape[1] + prefix_ntokens = 0 + for i in range(batch): + for j in range(seqlen[i].item()): + pos = token_offset[i].item() - seqlen[i].item() + j + cache_out[layer_id_val, i, pos, + :] = newkv[prefix_ntokens + j, :] + prefix_ntokens += seqlen[i].item() + return cache_out + + +# Default parameters for kv_cache operation +default_params = { + 'layer': 28, + 'layer_id': 0, + 'batch': 16, + 'max_seqlen': 384, + 'hidden_size': 1024 +} + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_kv_cache_operation.py + """ + layer = default_params['layer'] + layer_id = default_params['layer_id'] + batch = default_params['batch'] + max_seqlen = default_params['max_seqlen'] + hidden_size = default_params['hidden_size'] + seqlen = torch.randint(1, max_seqlen // 2, (batch,), dtype=torch.int32) + token_offset = seqlen.clone() + ntokens = seqlen.sum().item() + newkv = (torch.rand(ntokens, hidden_size) - 0.5) * 10 # [-5, 5] + newkv = newkv.to(torch.float16) + cache_in = torch.zeros(layer, batch, max_seqlen, + hidden_size, dtype=torch.float16) + layer_id_tensor = torch.tensor([layer_id], dtype=torch.int32) + return [newkv, layer_id_tensor, cache_in, token_offset, seqlen] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For kv_cache operation, no extra init params needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/common/Nonzero.py b/aikg/benchmark/aikgbench/llm/common/Nonzero.py new file mode 100644 index 0000000000000000000000000000000000000000..4b98483360f07614dc1cc6f835342455b422f477 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Nonzero.py @@ -0,0 +1,47 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs nonzero operation with padding. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x): + """ + Find nonzero indices and pad the result to match the input size. + + Args: + x: Input tensor + + Returns: + result: Padded nonzero indices tensor + num_non_negative: Number of nonzero elements + """ + num_non_negative = torch.count_nonzero(x) + padding_num = x.numel() - num_non_negative + # nonzero as tuple of indices, stack to shape [ndim, num_non_negative] + result = torch.stack(list(torch.nonzero(x, as_tuple=True))) + # pad with zeros to shape [ndim, numel] + if padding_num > 0: + padding = torch.zeros( + (x.shape[0], padding_num), dtype=result.dtype, device=result.device) + result = torch.cat((result, padding), dim=-1).long() + else: + result = result.long() + return result, torch.tensor(num_non_negative).long() + + +def get_inputs(): + """ + Generate random input tensor for testing. + """ + input0 = torch.randint(0, 2, (2, 490), dtype=torch.int64) + return [input0] + + +def get_init_inputs(): + return [] diff --git a/aikg/benchmark/aikgbench/llm/common/Onehot.py b/aikg/benchmark/aikgbench/llm/common/Onehot.py new file mode 100644 index 0000000000000000000000000000000000000000..9a698691e4ade075d6ad7a76d7dd428df7a1d178 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Onehot.py @@ -0,0 +1,47 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs onehot operation. + """ + + def __init__(self, axis=-1, depth=10): + super(Model, self).__init__() + self.axis = axis + self.depth = depth + + def forward(self, x): + """ + Perform onehot encoding on input tensor. + + Args: + x: Input tensor (indices) + + Returns: + Onehot encoded tensor + """ + # PyTorch one_hot always puts the new axis at the end, so we may need to permute + onehot = torch.nn.functional.one_hot(x, num_classes=self.depth) + if self.axis != -1 and self.axis != x.dim(): + # Move the new onehot axis to the desired position + dims = list(range(onehot.dim())) + new_axis = self.axis if self.axis >= 0 else x.dim() + 1 + self.axis + dims = dims[:-1] + dims.insert(new_axis, onehot.dim() - 1) + onehot = onehot.permute(*dims) + return onehot + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + shape0 = (16,) + input0 = torch.randint(0, 10, shape0, dtype=torch.int64) + return [input0] + + +def get_init_inputs(): + return [-1, 10] diff --git a/aikg/benchmark/aikgbench/llm/common/Pad.py b/aikg/benchmark/aikgbench/llm/common/Pad.py new file mode 100644 index 0000000000000000000000000000000000000000..9fa2f51e6b46dec36ff3f1d7508de8aac429a230 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Pad.py @@ -0,0 +1,70 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs pad operation (sequence gather from padded output). + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, tmp_out, padding_offset, seq_len, input_ids): + """ + Gather the last valid output for each sequence in the batch. + + Args: + tmp_out: [token_num, hidden_dim] tensor + padding_offset: [1, token_num] tensor + seq_len: [batch, 1] tensor + input_ids: [batch, total_length] tensor + + Returns: + out: [batch, hidden_dim] tensor + """ + batch = input_ids.shape[0] + hidden_dim = tmp_out.shape[1] + out = torch.zeros((batch, hidden_dim), + dtype=tmp_out.dtype, device=tmp_out.device) + temp_val = 0 + for i in range(batch): + temp_val = temp_val + seq_len[i][0].item() + out[i] = tmp_out[temp_val - 1].cpu() + return out + + +def get_inputs(): + """ + Generate random input tensors for testing (pure PyTorch version). + """ + batch = 32 + total_length = 64 + hidden_dim = 4096 + seq_len = torch.randint(1, total_length, (batch,), dtype=torch.int32) + input_ids = torch.zeros((batch, total_length), dtype=torch.long) + token_num = seq_len.sum().item() + tmp_out = (torch.rand(token_num, hidden_dim) * + 2 - 1).to(torch.float16) # [-1, 1] + # 构造 input_ids + for i in range(batch): + input_ids[i, :seq_len[i]] = torch.randint( + 1, 50, (seq_len[i],), dtype=torch.long) + # 构造 padding_offset + zeros_num = total_length - seq_len + cum_offsets_now = torch.cumsum(zeros_num, dim=0) + padding_offset = [] + for i in range(batch): + if i == 0: + padding_offset += [0] * seq_len[0].item() + else: + padding_offset += [cum_offsets_now[i - 1].item()] * \ + seq_len[i].item() + padding_offset = torch.tensor( + padding_offset, dtype=torch.int32).reshape(1, token_num) + seq_len = seq_len.reshape(batch, 1) + return [tmp_out, padding_offset, seq_len, input_ids] + + +def get_init_inputs(): + return [] diff --git a/aikg/benchmark/aikgbench/llm/common/Slice.py b/aikg/benchmark/aikgbench/llm/common/Slice.py new file mode 100644 index 0000000000000000000000000000000000000000..5cac14f56fc049b15b4783cf31cad0dbbc1caecf --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Slice.py @@ -0,0 +1,40 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs slice operation. + """ + + def __init__(self, offsets, size): + super(Model, self).__init__() + self.offsets = offsets + self.size = size + + def forward(self, x): + """ + Slice the input tensor according to offsets and size. + + Args: + x: Input tensor + + Returns: + Sliced tensor + """ + return x[self.offsets[0]:self.offsets[0]+self.size[0], + self.offsets[1]:self.offsets[1]+self.size[1]] + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + return [torch.randn(32, 128).half()] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [[2, 8], [10, 100]] diff --git a/aikg/benchmark/aikgbench/llm/common/Sort.py b/aikg/benchmark/aikgbench/llm/common/Sort.py new file mode 100644 index 0000000000000000000000000000000000000000..3b385eb4f3b1404df31f3dd2bff9d69f93fd3eca --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Sort.py @@ -0,0 +1,40 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs sort operation (top-k). + """ + + def __init__(self, num): + super(Model, self).__init__() + self.num = num + + def forward(self, x): + """ + Perform top-k operation on the input tensor. + + Args: + x: Input tensor + + Returns: + values: Top-k values + indices: Top-k indices + """ + values, indices = torch.topk(x, k=self.num, largest=True) + return values, indices.int() + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + return [torch.randint(-65504, 65504, (10, 22, 4096)).float().half()] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [1000] # 减少到1000,确保不超过最后一个维度 diff --git a/aikg/benchmark/aikgbench/llm/common/Split.py b/aikg/benchmark/aikgbench/llm/common/Split.py new file mode 100644 index 0000000000000000000000000000000000000000..c38c52a96ad1413ae5ce7283ffb2857ea7d8c9ef --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Split.py @@ -0,0 +1,39 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs split operation. + """ + + def __init__(self, split_dim, split_num): + super(Model, self).__init__() + self.split_dim = split_dim + self.split_num = split_num + + def forward(self, x): + """ + Split the input tensor into chunks. + + Args: + x: Input tensor + + Returns: + List of split tensors + """ + return torch.chunk(x, chunks=self.split_num, dim=self.split_dim) + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + return [torch.rand(4096, 22016).half()] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [-1, 2] diff --git a/aikg/benchmark/aikgbench/llm/common/Transpose.py b/aikg/benchmark/aikgbench/llm/common/Transpose.py new file mode 100644 index 0000000000000000000000000000000000000000..259de3398b83c8c481ad4fdc381005bd77eec71a --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Transpose.py @@ -0,0 +1,38 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs transpose operation. + """ + + def __init__(self, perm): + super(Model, self).__init__() + self.perm = perm + + def forward(self, x): + """ + Transpose the input tensor according to perm. + + Args: + x: Input tensor + + Returns: + Transposed tensor + """ + return x.permute(*self.perm) + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + return [torch.randn(32, 128).half()] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [[1, 0]] diff --git a/aikg/benchmark/aikgbench/llm/common/Unpad.py b/aikg/benchmark/aikgbench/llm/common/Unpad.py new file mode 100644 index 0000000000000000000000000000000000000000..221decdd790b53099561f79775634e8981a6f546 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/Unpad.py @@ -0,0 +1,111 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs unpad operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, input_ids, cum_offsets_now, token_num, seq_len): + """ + Remove padding from input_ids and generate corresponding offsets. + + Args: + input_ids: [batch, total_length] tensor + cum_offsets_now: [batch, 1] tensor + token_num: [1, 1] tensor + seq_len: [batch, 1] tensor + + Returns: + x_remove_padding: [1, batch * total_length] tensor + cum_offsets_out: [batch, 1] tensor + padding_offset: [1, batch * total_length] tensor + """ + batch = input_ids.shape[0] + total_length_imm = input_ids.shape[1] + + # Remove padding from input_ids + x_remove_padding = input_ids[0, :seq_len[0]] + for i in range(1, batch): + x_remove_padding = torch.cat( + [x_remove_padding, input_ids[i, :seq_len[i]]]) + + # Pad to full length + target_length = batch * total_length_imm + current_length = x_remove_padding.shape[0] + if current_length < target_length: + padding_size = target_length - current_length + padding = torch.zeros( + padding_size, dtype=x_remove_padding.dtype, device=x_remove_padding.device) + x_remove_padding = torch.cat([x_remove_padding, padding]) + + x_remove_padding = x_remove_padding.reshape( + 1, batch * total_length_imm) + + # Generate cum_offsets_out + cum_offsets_out = torch.zeros( + batch, 1, dtype=torch.int32, device=input_ids.device) + for i in range(1, batch): + cum_offsets_out[i] = cum_offsets_now[i - 1] + + # Generate padding_offset + padding_offset = [] + for i in range(batch): + if i == 0: + padding_offset += [0] * seq_len[0].item() + else: + padding_offset += [cum_offsets_now[i - 1] + [0].item()] * seq_len[i].item() + + # Add zeros for padding + zero_offset = torch.zeros(1, batch * total_length_imm - token_num[0][0].item(), + dtype=torch.int32, device=input_ids.device) + padding_offset = torch.tensor( + padding_offset, dtype=torch.int32, device=input_ids.device) + padding_offset = torch.cat([padding_offset, zero_offset.flatten()]) + padding_offset = padding_offset.reshape(1, batch * total_length_imm) + + return x_remove_padding.long(), cum_offsets_out, padding_offset + + +def get_inputs(): + """ + Generate random input tensors for testing (pure PyTorch version). + """ + batch = 32 + total_length_imm = 64 + + # Generate seq_len + seq_len = torch.randint(1, total_length_imm + 1, + (batch,), dtype=torch.int32) + + # Generate input_ids + input_ids = torch.zeros((batch, total_length_imm), dtype=torch.int32) + for i in range(batch): + input_ids[i, :seq_len[i]] = torch.randint( + 1, 50, (seq_len[i],), dtype=torch.int32) + + # Generate cum_offsets_now + zeros_num = total_length_imm - seq_len + cum_offsets_now = torch.cumsum(zeros_num, dim=0) + + # Generate token_num + token_num = seq_len.sum() + + # Reshape tensors + cum_offsets_now = cum_offsets_now.reshape(batch, 1) + token_num = token_num.reshape(1, 1) + seq_len = seq_len.reshape(batch, 1) + + return [input_ids.long(), cum_offsets_now, token_num, seq_len] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/common/View.py b/aikg/benchmark/aikgbench/llm/common/View.py new file mode 100644 index 0000000000000000000000000000000000000000..5a525253f25d7548ae78d7b3d943c4766889c83e --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/common/View.py @@ -0,0 +1,39 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs view operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x): + """ + Perform view operation on input tensor. + + Args: + x: Input tensor + + Returns: + Reshaped tensor + """ + # Reshape to 2D + return x.view(2, -1) + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + x = torch.rand(2, 64, dtype=torch.float16) + return [x] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/elewise/ElewiseAdd.py b/aikg/benchmark/aikgbench/llm/elewise/ElewiseAdd.py new file mode 100644 index 0000000000000000000000000000000000000000..9c81e3403bc73236c1611907916f3be144972724 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/ElewiseAdd.py @@ -0,0 +1,50 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs element-wise addition between two tensors. + """ + + def __init__(self): + super(Model, self).__init__() + # For element-wise addition, we don't need learnable parameters + # The addition will be performed between two input tensors + + def forward(self, x, y): + """ + Perform element-wise addition between two tensors. + + Args: + x: First input tensor + y: Second input tensor + + Returns: + Element-wise sum of x and y + """ + return torch.add(x, y) + + +# Model parameters - using the same shape as in test_add.py +shape = (1000000,) +batch_size = 1 # For element-wise operations, batch size is typically 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_add.py + """ + # Generate random tensors similar to test_add.py + input0 = torch.rand(shape) + input1 = torch.rand(shape) + return [input0, input1] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For element-wise addition, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/elewise/ElewiseCast.py b/aikg/benchmark/aikgbench/llm/elewise/ElewiseCast.py new file mode 100644 index 0000000000000000000000000000000000000000..50676730dfc1b18cf52710af95ba77fd709d0d08 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/ElewiseCast.py @@ -0,0 +1,48 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs data type casting on input tensor. + """ + + def __init__(self, output_dtype=torch.float32): + super(Model, self).__init__() + self.output_dtype = output_dtype + + def forward(self, x): + """ + Perform data type casting on input tensor. + + Args: + x: Input tensor + + Returns: + Input tensor cast to the specified output data type + """ + return x.type(self.output_dtype) + + +# Model parameters - using the same shape as in test_cast.py +shape = (10000,) +batch_size = 1 # For element-wise operations, batch size is typically 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_cast.py + """ + # Generate random tensors similar to test_cast.py (range [-5, 5]) + input0 = torch.rand(shape) * 10 - 5 + return [input0] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For data type casting, we need the target output data type. + Default to float16. + """ + return [torch.float16] diff --git a/aikg/benchmark/aikgbench/llm/elewise/ElewiseEqual.py b/aikg/benchmark/aikgbench/llm/elewise/ElewiseEqual.py new file mode 100644 index 0000000000000000000000000000000000000000..b3d11ff593d1bf4dcf4829f9859019f71e319e62 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/ElewiseEqual.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs element-wise equality comparison between two tensors. + """ + + def __init__(self): + super(Model, self).__init__() + # For element-wise equality comparison, we don't need learnable parameters + + def forward(self, x, y): + """ + Perform element-wise equality comparison between two tensors. + + Args: + x: First input tensor + y: Second input tensor + + Returns: + Boolean tensor indicating element-wise equality (converted to int8) + """ + return torch.eq(x, y).int().to(torch.int8) + + +# Model parameters - using the same shape as in test_equal.py +shape = (8, 6) +batch_size = 1 # For element-wise operations, batch size is typically 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_equal.py + """ + # Generate random tensors similar to test_equal.py (range [0, 100]) + input0 = torch.rand(shape) * 100 + input1 = torch.rand(shape) * 100 + return [input0, input1] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For element-wise equality comparison, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/elewise/ElewiseMul.py b/aikg/benchmark/aikgbench/llm/elewise/ElewiseMul.py new file mode 100644 index 0000000000000000000000000000000000000000..7d87922f1d0965fc0595bf2b023ed0107004dd3f --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/ElewiseMul.py @@ -0,0 +1,50 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs element-wise multiplication between two tensors. + """ + + def __init__(self): + super(Model, self).__init__() + # For element-wise multiplication, we don't need learnable parameters + # The multiplication will be performed between two input tensors + + def forward(self, x, y): + """ + Perform element-wise multiplication between two tensors. + + Args: + x: First input tensor + y: Second input tensor + + Returns: + Element-wise product of x and y + """ + return torch.mul(x, y) + + +# Model parameters - using the same shape as in test_mul.py +shape = (1000000,) +batch_size = 1 # For element-wise operations, batch size is typically 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_mul.py + """ + # Generate random tensors similar to test_mul.py (range [0, 100]) + input0 = torch.rand(shape) * 100 + input1 = torch.rand(shape) * 100 + return [input0, input1] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For element-wise multiplication, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/elewise/ElewiseMuls.py b/aikg/benchmark/aikgbench/llm/elewise/ElewiseMuls.py new file mode 100644 index 0000000000000000000000000000000000000000..7c732a6e46292c72106c99ab9ae9dec3f7aef721 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/ElewiseMuls.py @@ -0,0 +1,48 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs element-wise scalar multiplication on input tensor. + """ + + def __init__(self, scalar_value=2): + super(Model, self).__init__() + self.scalar_value = scalar_value + + def forward(self, x): + """ + Perform element-wise scalar multiplication on input tensor. + + Args: + x: Input tensor + + Returns: + Element-wise product of input tensor and scalar value + """ + return torch.mul(x, self.scalar_value) + + +# Model parameters - using the same shape as in test_muls.py +shape = (1000000,) +batch_size = 1 # For element-wise operations, batch size is typically 1 +scalar_value = 2 # From test_muls.py OP_PARAM_MULS["varAttr"] + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_muls.py + """ + # Generate random tensors similar to test_muls.py (range [0, 100]) + input0 = torch.rand(shape) * 100 + return [input0] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For element-wise scalar multiplication, we need the scalar value. + """ + return [scalar_value] diff --git a/aikg/benchmark/aikgbench/llm/elewise/ElewiseRealDiv.py b/aikg/benchmark/aikgbench/llm/elewise/ElewiseRealDiv.py new file mode 100644 index 0000000000000000000000000000000000000000..28828f5f29bf2b6b08dc3791e65b349fc18f88b0 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/ElewiseRealDiv.py @@ -0,0 +1,51 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs element-wise real division between two tensors. + """ + + def __init__(self): + super(Model, self).__init__() + # For element-wise division, we don't need learnable parameters + + def forward(self, x, y): + """ + Perform element-wise real division between two tensors. + + Args: + x: First input tensor (dividend) + y: Second input tensor (divisor) + + Returns: + Element-wise quotient of x divided by y + """ + return torch.div(x, y) + + +# Model parameters - using the same shape as in test_real_div.py +shape = (1000000,) +batch_size = 1 # For element-wise operations, batch size is typically 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_real_div.py + """ + # Generate random tensors similar to test_real_div.py (range [0, 100]) + input0 = torch.rand(shape) * 100 + input1 = torch.rand(shape) * 100 + # Avoid division by zero by replacing zeros with small values + input1 = torch.where(input1 == 0, torch.tensor(2**(-4)), input1) + return [input0, input1] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For element-wise division, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/elewise/ElewiseTanh.py b/aikg/benchmark/aikgbench/llm/elewise/ElewiseTanh.py new file mode 100644 index 0000000000000000000000000000000000000000..d5b978c46d49be8bfa9f1dc1a4d9938063a95a48 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/ElewiseTanh.py @@ -0,0 +1,47 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs tanh activation function on input tensor. + """ + + def __init__(self): + super(Model, self).__init__() + # For tanh activation, we don't need learnable parameters + + def forward(self, x): + """ + Apply tanh activation function to input tensor. + + Args: + x: Input tensor + + Returns: + Tanh activation of input tensor + """ + return torch.tanh(x) + + +# Model parameters - using the same shape as in test_tanh.py +shape = (1000000,) +batch_size = 1 # For element-wise operations, batch size is typically 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_tanh.py + """ + # Generate random tensors similar to test_tanh.py (range [-100, 100]) + input0 = torch.rand(shape) * 200 - 100 + return [input0] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For tanh activation, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/elewise/FastSoftMax.py b/aikg/benchmark/aikgbench/llm/elewise/FastSoftMax.py new file mode 100644 index 0000000000000000000000000000000000000000..7a51516fefccf5cb3d9bec9ba1a4f32cc48b4de5 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/elewise/FastSoftMax.py @@ -0,0 +1,82 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs fast softmax operation. + Based on test_fastsoftmax_operation.py from opstest/python/operations/fast_soft_max/ + """ + + def __init__(self, head_num=8, q_seq_len=None): + super(Model, self).__init__() + self.head_num = head_num + self.q_seq_len = q_seq_len if q_seq_len is not None else [ + 200] # Default sequence length + + def forward(self, x): + """ + Perform fast softmax operation on input tensor. + + Args: + x: Input tensor to be processed + + Returns: + Softmax output tensor + """ + golden = torch.empty_like(x) + start = 0 + + for i in range(len(self.q_seq_len)): + end = start + self.head_num * self.q_seq_len[i] * self.q_seq_len[i] + cur_data_input = x[start:end].reshape(-1, self.q_seq_len[i]) + cur_golden = torch.softmax(cur_data_input.to( + torch.float32), dim=-1).to(torch.float16) + golden[start:end] = cur_golden.reshape(-1) + start = end + + return golden + + +# Model parameters - using the same parameters as in test_fastsoftmax_operation.py +# Default parameters for fast softmax operation +default_params = { + 'head_num': 8, # Default number of heads + 'batch_size': 4, # Default batch size + 'seq_len_range': (100, 300) # Default sequence length range +} + +# Default sequence lengths for testing +default_seq_lens = [200, 150, 250, 180] # Example sequence lengths + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_fastsoftmax_operation.py + """ + # Use default parameters from test_fastsoftmax_operation.py + batch_size = default_params['batch_size'] + head_num = default_params['head_num'] + q_seq_len = default_seq_lens + + # Generate data_input_list based on q_seq_len + data_input_list = [] + for i in range(batch_size): + data_input = torch.randn( + head_num * q_seq_len[i] * q_seq_len[i]).to(torch.float16) + data_input_list.append(data_input) + + # Concatenate all inputs + data_input = torch.cat(data_input_list) + + return [data_input] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For fast softmax operation, we need head_num and q_seq_len. + """ + # Use default sequence lengths for initialization + return [default_params['head_num'], default_seq_lens] diff --git a/aikg/benchmark/aikgbench/llm/index/DynamicQuantUpdateScatter.py b/aikg/benchmark/aikgbench/llm/index/DynamicQuantUpdateScatter.py new file mode 100644 index 0000000000000000000000000000000000000000..924cb31876b2807e5825e9777f1fee28ffaf0f65 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/DynamicQuantUpdateScatter.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs DynamicQuantUpdateScatter operation. + """ + + def __init__(self, reduce="add", axis=0): + super(Model, self).__init__() + self.reduce = reduce + self.axis = axis + + def forward(self, var, var_scale, indices, updates, smooth_scales): + """ + Perform DynamicQuantUpdateScatter operation. + + Args: + var: Quantized variable tensor (int8) + var_scale: Variable scale tensor (float32) + indices: Indices tensor + updates: Updates tensor (float16/bfloat16) + smooth_scales: Smooth scales tensor (float16/bfloat16) + + Returns: + Tuple of (y, var, var_scale) tensors + """ + # Dequantize var using var_scale + var_dequantized = var.float() * var_scale + + # Apply smooth scales to updates + scaled_updates = updates * smooth_scales + + # Create a copy of var_dequantized for scatter operation + output = var_dequantized.clone() + + # Ensure data type compatibility + output = output.to(torch.float32) + scaled_updates = scaled_updates.to(torch.float32) + + # Perform scatter operation based on reduce mode + if self.reduce == "add": + output.scatter_add_(self.axis, indices, scaled_updates) + else: + output.scatter_(self.axis, indices, scaled_updates) + + # Re-quantize the result + # Find the scale for the updated tensor + max_val = torch.abs(output).max() + new_scale = max_val / 127.0 # Assuming int8 quantization + + # Quantize to int8 + y = torch.clamp(torch.round(output / new_scale), - + 128, 127).to(torch.int8) + + return y, output, new_scale.unsqueeze(0) + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Create tensors with appropriate shapes and types + var_shape = [4, 4] + indices_shape = [2, 4] + + var = torch.randint(-128, 127, var_shape, dtype=torch.int8) + var_scale = torch.randn(1, dtype=torch.float32) + indices = torch.randint(0, 4, indices_shape, dtype=torch.int64) # 改为 int64 + updates = torch.randn(indices_shape, dtype=torch.float16) + smooth_scales = torch.randn(indices_shape, dtype=torch.float16) + + return [var, var_scale, indices, updates, smooth_scales] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return ["add", 0] # reduce="add", axis=0 diff --git a/aikg/benchmark/aikgbench/llm/index/EmbeddingBag.py b/aikg/benchmark/aikgbench/llm/index/EmbeddingBag.py new file mode 100644 index 0000000000000000000000000000000000000000..1005651f07ef11b998106a950622a10f3b43431f --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/EmbeddingBag.py @@ -0,0 +1,70 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs EmbeddingBag operation. + """ + + def __init__(self, scale_grad_by_freq=False, mode='sum', sparse=False, include_last_offset=False, padding_idx=-1): + super(Model, self).__init__() + self.scale_grad_by_freq = scale_grad_by_freq + self.mode = mode + self.sparse = sparse + self.include_last_offset = include_last_offset + self.padding_idx = padding_idx + + def forward(self, weight, indices, offsets, per_sample_weights=None): + """ + Perform EmbeddingBag operation. + + Args: + weight: Embedding weight tensor + indices: Indices tensor + offsets: Offsets tensor + per_sample_weights: Per-sample weights tensor (optional) + + Returns: + Embedding bag output tensor + """ + # Use torch.nn.functional.embedding_bag for the operation + output = torch.nn.functional.embedding_bag( + indices, + weight, + offsets=offsets, + max_norm=None, + norm_type=2, + scale_grad_by_freq=self.scale_grad_by_freq, + mode=self.mode, + sparse=self.sparse, + per_sample_weights=per_sample_weights, + include_last_offset=self.include_last_offset, + padding_idx=self.padding_idx, + ) + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: weight = np.random.randn(9).reshape(3, 3), indices = np.random.randint(0, 3, size=6) + num_weights = 3 + weight_shape = [3, 3] + indices_shape = [6] + + weight = torch.randn(weight_shape, dtype=torch.float32) + indices = torch.randint(0, num_weights, indices_shape, dtype=torch.int64) + offsets = torch.tensor([0, 2, 4, 5], dtype=torch.int64) + per_sample_weights = torch.ones(indices_shape, dtype=torch.float32) + + return [weight, indices, offsets, per_sample_weights] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [False, 'sum', False, False, 1] # scale_grad_by_freq=False, mode='sum', sparse=False, include_last_offset=False, padding_idx=1 diff --git a/aikg/benchmark/aikgbench/llm/index/EmbeddingDenseGradV2.py b/aikg/benchmark/aikgbench/llm/index/EmbeddingDenseGradV2.py new file mode 100644 index 0000000000000000000000000000000000000000..65429ccd10388a99cec4775ccaab5100b00b8534 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/EmbeddingDenseGradV2.py @@ -0,0 +1,73 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs EmbeddingDenseGradV2 operation. + """ + + def __init__(self, num_weights=4, padding_idx=0, scale_grad_by_freq=False): + super(Model, self).__init__() + self.num_weights = num_weights + self.padding_idx = padding_idx + self.scale_grad_by_freq = scale_grad_by_freq + + def forward(self, grad, sort_indices, pos_idx, num_weights_tensor=None, padding_idx_tensor=None, scale_grad_by_freq_tensor=None): + """ + Perform EmbeddingDenseGradV2 operation. + + Args: + grad: Gradient tensor + sort_indices: Sorted indices tensor + pos_idx: Position indices tensor + num_weights_tensor: Number of weights tensor (optional) + padding_idx_tensor: Padding index tensor (optional) + scale_grad_by_freq_tensor: Scale gradient by frequency tensor (optional) + + Returns: + Embedding gradient tensor + """ + # Use provided tensors or fall back to instance variables + num_weights = num_weights_tensor.item( + ) if num_weights_tensor is not None else self.num_weights + padding_idx = padding_idx_tensor.item( + ) if padding_idx_tensor is not None else self.padding_idx + scale_grad_by_freq = scale_grad_by_freq_tensor.item( + ) if scale_grad_by_freq_tensor is not None else self.scale_grad_by_freq + + # Use torch.ops.aten.embedding_dense_backward for the operation + result = torch.ops.aten.embedding_dense_backward( + grad_output=grad, + indices=sort_indices, + num_weights=num_weights, + padding_idx=padding_idx, + scale_grad_by_freq=scale_grad_by_freq + ) + + return result + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: grad = np.random.randn(6).reshape(2, 3), num_weights = 4 + grad_shape = [2, 3] + num_weights = 4 + + grad = torch.randn(grad_shape, dtype=torch.float32) + sort_indices = torch.randint(0, num_weights, (2,), dtype=torch.int32) + pos_idx = torch.randint(0, 2, (2,), dtype=torch.int32) + num_weights_tensor = torch.tensor([num_weights], dtype=torch.int32) + padding_idx_tensor = torch.tensor([0], dtype=torch.int32) + scale_grad_by_freq_tensor = torch.tensor([False], dtype=torch.bool) + + return [grad, sort_indices, pos_idx, num_weights_tensor, padding_idx_tensor, scale_grad_by_freq_tensor] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [4, 0, False] # num_weights=4, padding_idx=0, scale_grad_by_freq=False diff --git a/aikg/benchmark/aikgbench/llm/index/FeedsRepeat.py b/aikg/benchmark/aikgbench/llm/index/FeedsRepeat.py new file mode 100644 index 0000000000000000000000000000000000000000..990c3a478ebb26d5613dbc80aaa565a2bb8092fe --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/FeedsRepeat.py @@ -0,0 +1,64 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs FeedsRepeat operation. + """ + + def __init__(self, output_feeds_size=500): + super(Model, self).__init__() + self.output_feeds_size = output_feeds_size + + def forward(self, feeds, feeds_repeat_times): + """ + Perform FeedsRepeat operation. + + Args: + feeds: Input feeds tensor + feeds_repeat_times: Repeat times for each feed + + Returns: + Repeated and padded feeds tensor + """ + # Repeat feeds according to feeds_repeat_times + repeated_feeds = torch.repeat_interleave( + feeds, feeds_repeat_times, dim=0) + + # Calculate total repeated size + total_repeated = torch.sum(feeds_repeat_times) + + # Calculate padding needed + pad_size = self.output_feeds_size - total_repeated + + if pad_size > 0: + # Pad with zeros to reach output_feeds_size + pad_shape = list(repeated_feeds.shape) + pad_shape[0] = pad_size + padding = torch.zeros( + pad_shape, dtype=repeated_feeds.dtype, device=repeated_feeds.device) + output = torch.cat([repeated_feeds, padding], dim=0) + else: + # Truncate if output_feeds_size is smaller than total repeated + output = repeated_feeds[:self.output_feeds_size] + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: feeds = np.array([1, 2, 3, 4, 5, 6]).reshape(2, 3) + feeds = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32) + feeds_repeat_times = torch.tensor([100, 200], dtype=torch.int32) + + return [feeds, feeds_repeat_times] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [500] # output_feeds_size=500 diff --git a/aikg/benchmark/aikgbench/llm/index/GatherV3.py b/aikg/benchmark/aikgbench/llm/index/GatherV3.py new file mode 100644 index 0000000000000000000000000000000000000000..79ce808b919ec1f88eebee6dd2e0b501bfb70632 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/GatherV3.py @@ -0,0 +1,53 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs GatherV3 operation. + """ + + def __init__(self, axis=0): + super(Model, self).__init__() + self.axis = axis + + def forward(self, self_tensor, indices, axis_tensor=None): + """ + Perform GatherV3 operation. + + Args: + self_tensor: Input tensor + indices: Index tensor + axis_tensor: Axis tensor (optional, uses self.axis if not provided) + + Returns: + Gathered tensor + """ + # Use the provided axis_tensor or fall back to self.axis + axis = axis_tensor.item() if axis_tensor is not None else self.axis + + # Use torch.gather to gather values along the specified axis + result = torch.gather(self_tensor, axis, indices) + return result + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: input_data = torch.randn(4, 2), index = torch.randint(0, 4, (2,)) + self_shape = [4, 2] + indices_shape = [2] + + self_tensor = torch.randn(self_shape, dtype=torch.float32) + indices = torch.randint(0, 4, indices_shape, dtype=torch.int64) # 改为 int64 + axis_tensor = torch.tensor([0], dtype=torch.int64) + + return [self_tensor, indices, axis_tensor] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [0] # axis=0 diff --git a/aikg/benchmark/aikgbench/llm/index/InplaceIndexAddWithSorted.py b/aikg/benchmark/aikgbench/llm/index/InplaceIndexAddWithSorted.py new file mode 100644 index 0000000000000000000000000000000000000000..f3adae31589b194998eceed488144c47082a0603 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/InplaceIndexAddWithSorted.py @@ -0,0 +1,61 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs InplaceIndexAddWithSorted operation. + """ + + def __init__(self, axis=0): + super(Model, self).__init__() + self.axis = axis + + def forward(self, var, value, sorted_indices, pos, alpha=1.0): + """ + Perform InplaceIndexAddWithSorted operation. + + Args: + var: Base tensor to add into + value: Source tensor with values to add + sorted_indices: Sorted index tensor + pos: Position tensor + alpha: Scaling factor + + Returns: + Tensor with values added at specified indices + """ + # Create a copy of var to avoid modifying the original + output = var.clone() + + # Use index_add_ to perform in-place index add operation + # This adds values at the specified indices along the given axis + output.index_add_(self.axis, sorted_indices, value, alpha=alpha) + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: self = np.random.randn(4, 2), index = torch.randint(0, 4, (4,)) + var_shape = [4, 2] + value_shape = [4, 2] + sorted_indices_shape = [4] + pos_shape = [4] + + var = torch.randn(var_shape, dtype=torch.float32) + value = torch.randn(value_shape, dtype=torch.float32) + sorted_indices = torch.randint( + 0, 4, sorted_indices_shape, dtype=torch.int32) + pos = torch.randint(0, 4, pos_shape, dtype=torch.int32) + + return [var, value, sorted_indices, pos] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [0] # axis=0 diff --git a/aikg/benchmark/aikgbench/llm/index/MaskedSelectV3.py b/aikg/benchmark/aikgbench/llm/index/MaskedSelectV3.py new file mode 100644 index 0000000000000000000000000000000000000000..d4aa88e10b5cc269ed641966eff1d2d1722edb94 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/MaskedSelectV3.py @@ -0,0 +1,45 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs MaskedSelectV3 operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, mask): + """ + Perform MaskedSelectV3 operation. + + Args: + x: Input tensor + mask: Boolean mask tensor + + Returns: + Selected elements as 1D tensor + """ + # Use torch.masked_select to select elements based on boolean mask + result = torch.masked_select(x, mask) + return result + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: input_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]]) + x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int32) + mask = torch.tensor( + [[True, False, True], [False, True, False]], dtype=torch.bool) + + return [x, mask] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/index/ScatterAddWithSorted.py b/aikg/benchmark/aikgbench/llm/index/ScatterAddWithSorted.py new file mode 100644 index 0000000000000000000000000000000000000000..5a8a1666e3e7358e361375ea3b7122a893530f43 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/ScatterAddWithSorted.py @@ -0,0 +1,61 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs ScatterAddWithSorted operation. + """ + + def __init__(self, dim=0): + super(Model, self).__init__() + self.dim = dim + + def forward(self, var, value, sorted_index, pos, reduction="add"): + """ + Perform ScatterAddWithSorted operation. + + Args: + var: Base tensor to scatter into + value: Source tensor with values to scatter + sorted_index: Sorted index tensor + pos: Position tensor + reduction: Reduction method ("add" for scatter_add) + + Returns: + Scattered tensor with accumulated values + """ + # Create a copy of var to avoid modifying the original + output = var.clone() + + # Use scatter_add_ to perform in-place scatter add operation + # This accumulates values at the same indices + output.scatter_add_(self.dim, sorted_index, value) + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: self = torch.randn(4, 4), index = torch.randint(0, 3, (3, 4)) + var_shape = [4, 4] + value_shape = [4, 4] + sorted_index_shape = [3, 4] + pos_shape = [3, 4] + + var = torch.randn(var_shape, dtype=torch.float32) + value = torch.randn(value_shape, dtype=torch.float32) + sorted_index = torch.randint( + 0, 3, sorted_index_shape, dtype=torch.int64) # 改为 int64 + pos = torch.randint(0, 4, pos_shape, dtype=torch.int32) + + return [var, value, sorted_index, pos] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [0] # dim=0 diff --git a/aikg/benchmark/aikgbench/llm/index/ScatterElementsV2.py b/aikg/benchmark/aikgbench/llm/index/ScatterElementsV2.py new file mode 100644 index 0000000000000000000000000000000000000000..2c35f7c2e90cdaccc0ca2c3020d55edf62b132a0 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/ScatterElementsV2.py @@ -0,0 +1,56 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs ScatterElementsV2 operation. + """ + + def __init__(self, dim=0): + super(Model, self).__init__() + self.dim = dim + + def forward(self, self_tensor, index, src): + """ + Perform ScatterElementsV2 operation. + + Args: + self_tensor: Base tensor to scatter into + index: Index tensor specifying positions + src: Source tensor with values to scatter + + Returns: + Scattered tensor + """ + # Create a copy of self_tensor to avoid modifying the original + output = self_tensor.clone() + + # Use scatter_ to perform in-place scatter operation along specified dimension + output.scatter_(self.dim, index, src) + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: self_shape = [3, 4], index_shape = [2, 3], src_shape = [2, 3] + self_shape = [3, 4] + index_shape = [2, 3] + src_shape = [2, 3] + + self_tensor = torch.randn(self_shape, dtype=torch.float32) + # indices within valid range + index = torch.randint(0, 3, index_shape, dtype=torch.int64) + src = torch.randn(src_shape, dtype=torch.float32) + + return [self_tensor, index, src] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [0] # dim=0 diff --git a/aikg/benchmark/aikgbench/llm/index/ScatterList.py b/aikg/benchmark/aikgbench/llm/index/ScatterList.py new file mode 100644 index 0000000000000000000000000000000000000000..e15d79e3eea028048235859ac4945cff59f13dbe --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/ScatterList.py @@ -0,0 +1,56 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs ScatterList operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, self_tensor, index, src): + """ + Perform ScatterList operation. + + Args: + self_tensor: Base tensor to scatter into + index: Index tensor specifying positions + src: Source tensor with values to scatter + + Returns: + Scattered tensor + """ + # Create a copy of self_tensor to avoid modifying the original + output = self_tensor.clone() + + # Use scatter_ to perform in-place scatter operation + # This is equivalent to scatter operation where we update values at specified indices + output.scatter_(0, index, src) + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: varRefShape = [5, 3, 4], indiceShape = [1, 2], updatesShape = [1, 5, 3, 4] + varRefShape = [5, 3, 4] + indiceShape = [1, 2] + updatesShape = [1, 5, 3, 4] + + self_tensor = torch.randn(varRefShape, dtype=torch.float32) + # indices within valid range + index = torch.randint(0, 3, indiceShape, dtype=torch.int64) + src = torch.randn(updatesShape, dtype=torch.float32) + + return [self_tensor, index, src] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/index/TopKV3.py b/aikg/benchmark/aikgbench/llm/index/TopKV3.py new file mode 100644 index 0000000000000000000000000000000000000000..d82e50f9d7c1bcb8d716a04a1f907c6c58de727e --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/index/TopKV3.py @@ -0,0 +1,46 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs TopKV3 operation. + """ + + def __init__(self, k=2, dim=1, largest=True, sorted=True): + super(Model, self).__init__() + self.k = k + self.dim = dim + self.largest = largest + self.sorted = sorted + + def forward(self, self_tensor): + """ + Perform TopKV3 operation. + + Args: + self_tensor: Input tensor + + Returns: + Tuple of (values, indices) tensors + """ + values, indices = torch.topk(self_tensor, k=self.k, dim=self.dim, + largest=self.largest, sorted=self.sorted) + return values, indices + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from gen_data.py: input_shape = [2, 16] + input_shape = [2, 16] + self_tensor = torch.randn(input_shape, dtype=torch.float32) + return [self_tensor] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [2, 1, True, True] # k=2, dim=1, largest=True, sorted=True diff --git a/aikg/benchmark/aikgbench/llm/matmul/BatchMatMulV3.py b/aikg/benchmark/aikgbench/llm/matmul/BatchMatMulV3.py new file mode 100644 index 0000000000000000000000000000000000000000..566495287ac7b71f7d91872d68011f41700d8006 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/matmul/BatchMatMulV3.py @@ -0,0 +1,74 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs BatchMatMulV3 operation. + """ + + def __init__(self, adj_x1=False, adj_x2=False, offset_x=0, enable_hf32=False): + super(Model, self).__init__() + self.adj_x1 = adj_x1 + self.adj_x2 = adj_x2 + self.offset_x = offset_x + self.enable_hf32 = enable_hf32 + + def forward(self, x1, x2, bias=None, offset_w=None): + """ + Perform BatchMatMulV3 operation. + + Args: + x1: First input tensor + x2: Second input tensor + bias: Optional bias tensor + offset_w: Optional offset tensor + + Returns: + Output tensor after batch matrix multiplication + """ + # Apply adjoint operations if needed + if self.adj_x1: + x1 = x1.transpose(-2, -1) + if self.adj_x2: + x2 = x2.transpose(-2, -1) + + # Perform batch matrix multiplication + output = torch.matmul(x1, x2) + + # Add bias if provided + if bias is not None: + output = output + bias + + # Add offset if provided + if offset_w is not None: + output = output + offset_w + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from README: x1(2, 16, 32), x2(2, 32, 16) + batch_size, m, k, n = 2, 16, 32, 16 + + # Generate input tensors + x1 = torch.randn(batch_size, m, k, dtype=torch.float32) + x2 = torch.randn(batch_size, k, n, dtype=torch.float32) + + # Generate optional bias + bias = torch.randn(batch_size, 1, n, dtype=torch.float32) + + # Generate optional offset + offset_w = torch.randn(batch_size, m, n, dtype=torch.float32) + + return [x1, x2, bias, offset_w] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [False, False, 0, False] # adj_x1=False, adj_x2=False, offset_x=0, enable_hf32=False diff --git a/aikg/benchmark/aikgbench/llm/matmul/ComplexMatMul.py b/aikg/benchmark/aikgbench/llm/matmul/ComplexMatMul.py new file mode 100644 index 0000000000000000000000000000000000000000..eab97519bccdfa8f97238c609545f0498ee92f13 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/matmul/ComplexMatMul.py @@ -0,0 +1,62 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs ComplexMatMul operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, y, bias): + """ + Perform ComplexMatMul operation. + + Args: + x: First complex input tensor + y: Second complex input tensor + bias: Complex bias tensor + + Returns: + Complex output tensor after matrix multiplication + """ + # Perform complex matrix multiplication + output = torch.matmul(x, y) + + # Add bias + output = output + bias + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use complex64 data type as specified in README + batch_size, m, k, n = 2, 16, 32, 16 + + # Generate complex input tensors + x_real = torch.randn(batch_size, m, k, dtype=torch.float32) + x_imag = torch.randn(batch_size, m, k, dtype=torch.float32) + x = torch.complex(x_real, x_imag) + + y_real = torch.randn(batch_size, k, n, dtype=torch.float32) + y_imag = torch.randn(batch_size, k, n, dtype=torch.float32) + y = torch.complex(y_real, y_imag) + + # Generate complex bias + bias_real = torch.randn(batch_size, m, n, dtype=torch.float32) + bias_imag = torch.randn(batch_size, m, n, dtype=torch.float32) + bias = torch.complex(bias_real, bias_imag) + + return [x, y, bias] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/matmul/GemmV2.py b/aikg/benchmark/aikgbench/llm/matmul/GemmV2.py new file mode 100644 index 0000000000000000000000000000000000000000..4fba348216201c6cb99c2a0931fbdfb7465ae61b --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/matmul/GemmV2.py @@ -0,0 +1,64 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs GemmV2 operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, A, B, alpha, beta, C): + """ + Perform GemmV2 operation. + + Args: + A: First input tensor + B: Second input tensor + alpha: Alpha scaling factor + beta: Beta scaling factor + C: Third input tensor + + Returns: + Output tensor: out = α(A @ B) + βC + """ + # Perform matrix multiplication + matmul_result = torch.matmul(A, B) + + # Apply alpha scaling + scaled_result = alpha * matmul_result + + # Apply beta scaling to C and add + output = scaled_result + beta * C + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from README: A(2, 2), B(2, 2), C(2, 2) + m, n, k = 2, 2, 2 + + # Generate input tensors (using float16 as specified) + A = torch.randn(m, k, dtype=torch.float16) + B = torch.randn(k, n, dtype=torch.float16) + + # Generate scaling factors + alpha = torch.randn(1, dtype=torch.float16) + beta = torch.randn(1, dtype=torch.float16) + + # Generate C tensor (using float32 as specified) + C = torch.randn(m, n, dtype=torch.float32) + + return [A, B, alpha, beta, C] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/matmul/MatMulV3.py b/aikg/benchmark/aikgbench/llm/matmul/MatMulV3.py new file mode 100644 index 0000000000000000000000000000000000000000..712ef63e284e30d775d45e83f67d1cb188ecb510 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/matmul/MatMulV3.py @@ -0,0 +1,74 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs MatMulV3 operation. + """ + + def __init__(self, transpose_x1=False, transpose_x2=False, offset_x=0, enable_hf32=False): + super(Model, self).__init__() + self.transpose_x1 = transpose_x1 + self.transpose_x2 = transpose_x2 + self.offset_x = offset_x + self.enable_hf32 = enable_hf32 + + def forward(self, x1, x2, bias=None, offset_w=None): + """ + Perform MatMulV3 operation. + + Args: + x1: First input tensor + x2: Second input tensor + bias: Optional bias tensor + offset_w: Optional offset tensor + + Returns: + Output tensor after matrix multiplication + """ + # Apply transpose operations if needed + if self.transpose_x1: + x1 = x1.transpose(-2, -1) + if self.transpose_x2: + x2 = x2.transpose(-2, -1) + + # Perform matrix multiplication + output = torch.matmul(x1, x2) + + # Add bias if provided + if bias is not None: + output = output + bias + + # Add offset if provided + if offset_w is not None: + output = output + offset_w + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from README: x1(2, 16, 32), x2(2, 32, 16) + batch_size, m, k, n = 2, 16, 32, 16 + + # Generate input tensors + x1 = torch.randn(batch_size, m, k, dtype=torch.float32) + x2 = torch.randn(batch_size, k, n, dtype=torch.float32) + + # Generate optional bias + bias = torch.randn(batch_size, 1, n, dtype=torch.float32) + + # Generate optional offset + offset_w = torch.randn(batch_size, m, n, dtype=torch.float32) + + return [x1, x2, bias, offset_w] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [False, False, 0, False] # transpose_x1=False, transpose_x2=False, offset_x=0, enable_hf32=False diff --git a/aikg/benchmark/aikgbench/llm/matmul/Mmad.py b/aikg/benchmark/aikgbench/llm/matmul/Mmad.py new file mode 100644 index 0000000000000000000000000000000000000000..9303af50cf0bf623cc844deb998b83e5c7280a12 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/matmul/Mmad.py @@ -0,0 +1,55 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs Mmad operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, A, B, Bias): + """ + Perform Mmad operation. + + Args: + A: Left matrix tensor [M, K] + B: Right matrix tensor [K, N] + Bias: Bias tensor [N] + + Returns: + Output tensor C = A * B + Bias + """ + # Perform matrix multiplication + output = torch.matmul(A, B) + + # Add bias to each row + output = output + Bias + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from README: A(32, 32), B(32, 32), Bias(1, 32) + M, K, N = 32, 32, 32 + + # Generate input tensors (using float16 as specified) + A = torch.randn(M, K, dtype=torch.float16) + B = torch.randn(K, N, dtype=torch.float16) + + # Generate bias (using float as specified) + Bias = torch.randn(1, N, dtype=torch.float32) + + return [A, B, Bias] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/matmul/QuantBatchMatmulV3.py b/aikg/benchmark/aikgbench/llm/matmul/QuantBatchMatmulV3.py new file mode 100644 index 0000000000000000000000000000000000000000..a149d92ffd80ce1adda281bb726ba233d2625d10 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/matmul/QuantBatchMatmulV3.py @@ -0,0 +1,69 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs QuantBatchMatmulV3 operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x1, x2, scale, offset, bias=None): + """ + Perform QuantBatchMatmulV3 operation. + + Args: + x1: First quantized input tensor + x2: Second quantized input tensor + scale: Scale factor for quantization + offset: Offset factor for quantization + bias: Optional bias tensor + + Returns: + Quantized output tensor + """ + # Convert to float for matrix multiplication + x1_float = x1.float() + x2_float = x2.float() + + # Perform matrix multiplication + output = torch.matmul(x1_float, x2_float) + + # Add bias if provided + if bias is not None: + output = output + bias + + # Apply scale and offset + output = output * scale + offset + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from README: x1(16, 32), x2(32, 16) + m, k, n = 16, 32, 16 + + # Generate quantized input tensors (using int8 as specified) + x1 = torch.randint(-128, 127, (m, k), dtype=torch.int8) + x2 = torch.randint(-128, 127, (k, n), dtype=torch.int8) + + # Generate scale and offset + scale = torch.randn(1, dtype=torch.float32) + offset = torch.randn(1, dtype=torch.float32) + + # Generate optional bias + bias = torch.randn(n, dtype=torch.float32) + + return [x1, x2, scale, offset, bias] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/matmul/WeightQuantBatchMatmulV2.py b/aikg/benchmark/aikgbench/llm/matmul/WeightQuantBatchMatmulV2.py new file mode 100644 index 0000000000000000000000000000000000000000..70b668b083d164f7eda6a57222021d0ebce253e9 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/matmul/WeightQuantBatchMatmulV2.py @@ -0,0 +1,77 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs WeightQuantBatchMatmulV2 operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, weight, antiquantScale, antiquantOffsetOptional=None, quantScaleOptional=None, quantOffsetOptional=None): + """ + Perform WeightQuantBatchMatmulV2 operation. + + Args: + x: Input tensor + weight: Quantized weight tensor + antiquantScale: Scale for weight dequantization + antiquantOffsetOptional: Optional offset for weight dequantization + quantScaleOptional: Optional scale for output quantization + quantOffsetOptional: Optional offset for output quantization + + Returns: + Output tensor after weight dequantization and matrix multiplication + """ + # Dequantize weight: ANTIQUANT(weight) = (weight + antiquantOffset) * antiquantScale + dequantized_weight = weight.float() + if antiquantOffsetOptional is not None: + dequantized_weight = dequantized_weight + antiquantOffsetOptional + dequantized_weight = dequantized_weight * antiquantScale + + # Convert x to float32 for matrix multiplication if needed + x_float = x.float() if x.dtype != torch.float32 else x + + # Perform matrix multiplication + output = torch.matmul(x_float, dequantized_weight) + + # Apply output quantization if provided + if quantScaleOptional is not None: + output = output * quantScaleOptional + if quantOffsetOptional is not None: + output = output + quantOffsetOptional + + return output + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use shapes from README: x(16, 32), weight(32, 16) + m, k, n = 16, 32, 16 + + # Generate input tensors (using float16 as specified) + x = torch.randn(m, k, dtype=torch.float16) + + # Generate quantized weight (using int8 as specified) + weight = torch.randint(-128, 127, (k, n), dtype=torch.int8) + + # Generate dequantization parameters + antiquantScale = torch.randn(1, dtype=torch.float16) + antiquantOffsetOptional = torch.randn(1, dtype=torch.float16) + + # Generate optional quantization parameters + quantScaleOptional = torch.randn(1, dtype=torch.float32) + quantOffsetOptional = torch.randn(1, dtype=torch.float32) + + return [x, weight, antiquantScale, antiquantOffsetOptional, quantScaleOptional, quantOffsetOptional] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/norm/AddLayerNorm.py b/aikg/benchmark/aikgbench/llm/norm/AddLayerNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..b7ba6f6a748c8b943d41273553118c8b22d65474 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/AddLayerNorm.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs AddLayerNorm operation. + """ + + def __init__(self, epsilon=1e-6, additional_output=False): + super(Model, self).__init__() + self.epsilon = epsilon + self.additional_output = additional_output + + def forward(self, x1, x2, gamma, beta, bias=None): + """ + Perform AddLayerNorm operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + bias: Optional bias tensor + + Returns: + Tuple of (output, mean, rstd, x) if additional_output=True + Otherwise just the output tensor + """ + # Add the two input tensors + if bias is not None: + x = x1 + x2 + bias + else: + x = x1 + x2 + + # Get input shape and reshape for layer norm + input_shape = x.shape + row_size = x.shape[-1] + row_count = 1 + for i in range(0, len(input_shape) - 1): + row_count *= input_shape[i] + + x_shape = (row_count, row_size) + x_mean_shape = (row_count, 1) + + # Reshape for layer norm computation + x_reshaped = x.reshape(x_shape) + + # Compute mean and variance + x_mean = torch.mean(x_reshaped, dim=1, keepdim=True) + x_var = torch.var(x_reshaped, dim=1, keepdim=True, + unbiased=False) + self.epsilon + x_rstd = 1.0 / torch.sqrt(x_var) + + # Broadcast tensors to match x_shape + x_mean_broadcast = x_mean.expand(x_shape) + x_rstd_broadcast = x_rstd.expand(x_shape) + gamma_broadcast = gamma.expand(x_shape) + beta_broadcast = beta.expand(x_shape) + + # Apply layer normalization + y = torch.multiply(torch.multiply( + x_reshaped - x_mean_broadcast, x_rstd_broadcast), gamma_broadcast) + beta_broadcast + + # Reshape back to original shape + y = y.reshape(input_shape) + x_mean = x_mean.reshape(input_shape[:-1] + (1,)) + x_rstd = x_rstd.reshape(input_shape[:-1] + (1,)) + + if self.additional_output: + return y, x_mean, x_rstd, x + else: + return y + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use the same shapes as in gen_data.py + batch_size, seq_len, hidden_size = 1, 2, 8 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma and beta parameters + gamma = torch.randn(hidden_size, dtype=torch.float32) + beta = torch.randn(hidden_size, dtype=torch.float32) + + # Generate optional bias + bias = torch.randn(hidden_size, dtype=torch.float32) + + return [x1, x2, gamma, beta, bias] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [1e-6, True] # epsilon=1e-6, additional_output=True diff --git a/aikg/benchmark/aikgbench/llm/norm/AddLayerNormGrad.py b/aikg/benchmark/aikgbench/llm/norm/AddLayerNormGrad.py new file mode 100644 index 0000000000000000000000000000000000000000..01ce7b919d8ee571cb06a736b1f394f892f17548 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/AddLayerNormGrad.py @@ -0,0 +1,86 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs AddLayerNormGrad operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, dy, x1, x2, rstd, mean, gamma, dsum): + """ + Perform AddLayerNormGrad operation (backward pass). + + Args: + dy: Gradient of output + x1: First input tensor + x2: Second input tensor + rstd: Reciprocal standard deviation from forward pass + mean: Mean from forward pass + gamma: Scale parameter tensor + dsum: Sum of gradients + + Returns: + Tuple of (dx, dgamma, dbeta) where: + - dx is the gradient with respect to input + - dgamma is the gradient with respect to gamma + - dbeta is the gradient with respect to beta + """ + # Add the two input tensors (same as forward pass) + x = x1 + x2 + + # Compute gradients for layer norm backward + # This is a simplified implementation of the backward pass + N = x.shape[0] + C = x.shape[-1] + + # Compute gradients for gamma and beta + dgamma = torch.sum(dy * (x - mean) * rstd, dim=0) + dbeta = torch.sum(dy, dim=0) + + # Compute gradient with respect to input + # Simplified gradient computation + dx = dy * gamma * rstd + + return dx, dgamma, dbeta + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other layer norm operations + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gradient of output + dy = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate statistics from forward pass + x = x1 + x2 + mean = torch.mean(x, dim=-1, keepdim=True) + var = torch.var(x, dim=-1, keepdim=True, unbiased=False) + 1e-6 + rstd = 1.0 / torch.sqrt(var) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float32) + + # Generate dsum (simplified) + dsum = torch.randn_like(dy) + + return [dy, x1, x2, rstd, mean, gamma, dsum] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For gradient operations, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/norm/AddLayerNormQuant.py b/aikg/benchmark/aikgbench/llm/norm/AddLayerNormQuant.py new file mode 100644 index 0000000000000000000000000000000000000000..069fcdba71c0a7a42f201bb5664fefd46764a25a --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/AddLayerNormQuant.py @@ -0,0 +1,125 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs AddLayerNormQuant operation. + Based on + """ + + def __init__(self, epsilon=1e-6, additional_output=False, quant_mode="symmetric"): + super(Model, self).__init__() + self.epsilon = epsilon + self.additional_output = additional_output + self.quant_mode = quant_mode + + def forward(self, x1, x2, gamma, beta, bias, scales1, scales2, zero_points1, zero_points2): + """ + Perform AddLayerNormQuant operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + bias: Bias tensor + scales1: Quantization scales for first output + scales2: Quantization scales for second output + zero_points1: Quantization zero points for first output + zero_points2: Quantization zero points for second output + + Returns: + Tuple of (y1, y2, x, out_scales1, out_scales2) where: + - y1, y2 are quantized outputs + - x is the sum of input tensors + - out_scales1, out_scales2 are output scales + """ + # Add the two input tensors + if bias is not None: + x = x1 + x2 + bias + else: + x = x1 + x2 + + # Get input shape and reshape for layer norm + input_shape = x.shape + row_size = x.shape[-1] + row_count = 1 + for i in range(0, len(input_shape) - 1): + row_count *= input_shape[i] + + x_shape = (row_count, row_size) + x_mean_shape = (row_count, 1) + + # Reshape for layer norm computation + x_reshaped = x.reshape(x_shape) + + # Compute mean and variance + x_mean = torch.mean(x_reshaped, dim=1, keepdim=True) + x_var = torch.var(x_reshaped, dim=1, keepdim=True, + unbiased=False) + self.epsilon + x_rstd = 1.0 / torch.sqrt(x_var) + + # Broadcast tensors to match x_shape + x_mean_broadcast = x_mean.expand(x_shape) + x_rstd_broadcast = x_rstd.expand(x_shape) + gamma_broadcast = gamma.expand(x_shape) + beta_broadcast = beta.expand(x_shape) + + # Apply layer normalization + y = torch.multiply(torch.multiply( + x_reshaped - x_mean_broadcast, x_rstd_broadcast), gamma_broadcast) + beta_broadcast + + # Reshape back to original shape + y = y.reshape(input_shape) + + # Quantize outputs + y1_quantized = torch.round( + y / scales1 + zero_points1).clamp(-128, 127).to(torch.int8) + y2_quantized = torch.round( + y / scales2 + zero_points2).clamp(-128, 127).to(torch.int8) + + # Compute output scales + out_scales1 = scales1 + out_scales2 = scales2 + + if self.additional_output: + return y1_quantized, y2_quantized, x, out_scales1, out_scales2 + else: + return y1_quantized, y2_quantized + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other layer norm operations + batch_size, seq_len, hidden_size = 1, 2, 8 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma and beta parameters + gamma = torch.randn(hidden_size, dtype=torch.float32) + beta = torch.randn(hidden_size, dtype=torch.float32) + + # Generate bias + bias = torch.randn(hidden_size, dtype=torch.float32) + + # Generate quantization parameters + scales1 = torch.rand(1, dtype=torch.float32) * 0.1 + 0.01 + scales2 = torch.rand(1, dtype=torch.float32) * 0.1 + 0.01 + zero_points1 = torch.zeros(1, dtype=torch.float32) + zero_points2 = torch.zeros(1, dtype=torch.float32) + + return [x1, x2, gamma, beta, bias, scales1, scales2, zero_points1, zero_points2] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-6, True, "symmetric"] # epsilon=1e-6, additional_output=True, quant_mode="symmetric" diff --git a/aikg/benchmark/aikgbench/llm/norm/AddRmsNorm.py b/aikg/benchmark/aikgbench/llm/norm/AddRmsNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..c8269a7338a684c1574eb9cbbdfd9959dcbe8e12 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/AddRmsNorm.py @@ -0,0 +1,71 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs AddRmsNorm operation. + Based on + """ + + def __init__(self, epsilon=1e-6): + super(Model, self).__init__() + self.epsilon = epsilon + + def forward(self, x1, x2, gamma): + """ + Perform AddRmsNorm operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + + Returns: + Tuple of (output, rstd, x) where: + - output is the normalized tensor + - rstd is the reciprocal standard deviation + - x is the sum of input tensors + """ + # Add the two input tensors + x = x1 + x2 + + # Compute RMS (Root Mean Square) normalization + # Unlike LayerNorm, RmsNorm doesn't subtract the mean + x_squared = x.pow(2) + x_rms = torch.sqrt(x_squared.mean(dim=-1, keepdim=True) + self.epsilon) + x_rstd = 1.0 / x_rms + + # Apply normalization + x_normalized = x * x_rstd + + # Apply scale parameter + output = x_normalized * gamma + + return output, x_rstd, x + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use the same shapes as in gen_data.py + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float32) + + return [x1, x2, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-6] # epsilon=1e-6 diff --git a/aikg/benchmark/aikgbench/llm/norm/AddRmsNormCast.py b/aikg/benchmark/aikgbench/llm/norm/AddRmsNormCast.py new file mode 100644 index 0000000000000000000000000000000000000000..454726e0f8c86ebc9580389fc39c397616e4d9ae --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/AddRmsNormCast.py @@ -0,0 +1,77 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs AddRmsNormCast operation. + Based on + """ + + def __init__(self, epsilon=1e-6): + super(Model, self).__init__() + self.epsilon = epsilon + + def forward(self, x1, x2, gamma): + """ + Perform AddRmsNormCast operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + + Returns: + Tuple of (y1, y2, rstd, x) where: + - y1 is the cast output (float) + - y2 is the normalized output (original dtype) + - rstd is the reciprocal standard deviation + - x is the sum of input tensors + """ + # Add the two input tensors + x = x1 + x2 + + # Compute RMS (Root Mean Square) normalization + x_squared = x.pow(2) + x_rms = torch.sqrt(x_squared.mean(dim=-1, keepdim=True) + self.epsilon) + x_rstd = 1.0 / x_rms + + # Apply normalization + x_normalized = x * x_rstd + + # Apply scale parameter + output = x_normalized * gamma + + # Cast to float for y1 + y1 = output.to(torch.float32) + + # Keep original dtype for y2 + y2 = output + + return y1, y2, x_rstd, x + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other rms norm operations + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensors (using float16 as specified in README) + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float16) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float16) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float16) + + return [x1, x2, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-6] # epsilon=1e-6 diff --git a/aikg/benchmark/aikgbench/llm/norm/AddRmsNormDynamicQuant.py b/aikg/benchmark/aikgbench/llm/norm/AddRmsNormDynamicQuant.py new file mode 100644 index 0000000000000000000000000000000000000000..c9767215ccea71b55a31e9d60f9e9909ee391b8a --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/AddRmsNormDynamicQuant.py @@ -0,0 +1,86 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs AddRmsNormDynamicQuant operation. + Based on + """ + + def __init__(self, epsilon=1e-6): + super(Model, self).__init__() + self.epsilon = epsilon + + def forward(self, x1, x2, gamma, smooth_scale1, smooth_scale2): + """ + Perform AddRmsNormDynamicQuant operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + smooth_scale1: Smooth scale for first output + smooth_scale2: Smooth scale for second output + + Returns: + Tuple of (y1, y2, x, scale1, scale2) where: + - y1, y2 are dynamically quantized outputs + - x is the sum of input tensors + - scale1, scale2 are dynamic quantization scales + """ + # Add the two input tensors + x = x1 + x2 + + # Compute RMS (Root Mean Square) normalization + x_squared = x.pow(2) + x_rms = torch.sqrt(x_squared.mean(dim=-1, keepdim=True) + self.epsilon) + x_rstd = 1.0 / x_rms + + # Apply normalization + x_normalized = x * x_rstd + + # Apply scale parameter + output = x_normalized * gamma + + # Dynamic quantization for first output + max_val1 = torch.max(torch.abs(output)) + scale1 = max_val1 / 127.0 + y1 = torch.round(output / scale1).clamp(-128, 127).to(torch.int8) + + # Dynamic quantization for second output + max_val2 = torch.max(torch.abs(output)) + scale2 = max_val2 / 127.0 + y2 = torch.round(output / scale2).clamp(-128, 127).to(torch.int8) + + return y1, y2, x, scale1, scale2 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other rms norm operations + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensors (using float16 as specified in README) + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float16) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float16) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float16) + + # Generate smooth scales + smooth_scale1 = torch.rand(1, dtype=torch.float16) * 0.1 + 0.01 + smooth_scale2 = torch.rand(1, dtype=torch.float16) * 0.1 + 0.01 + + return [x1, x2, gamma, smooth_scale1, smooth_scale2] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-6] # epsilon=1e-6 diff --git a/aikg/benchmark/aikgbench/llm/norm/AddRmsNormQuant.py b/aikg/benchmark/aikgbench/llm/norm/AddRmsNormQuant.py new file mode 100644 index 0000000000000000000000000000000000000000..15f6d4528178dd16ff011ad6d54ca3c6f9439931 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/AddRmsNormQuant.py @@ -0,0 +1,94 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs AddRmsNormQuant operation. + Based on + """ + + def __init__(self, epsilon=1e-6, axis=-1, div_mode=False): + super(Model, self).__init__() + self.epsilon = epsilon + self.axis = axis + self.div_mode = div_mode + + def forward(self, x1, x2, gamma, scales1, scales2, zero_points1, zero_points2): + """ + Perform AddRmsNormQuant operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + scales1: Quantization scales for first output + scales2: Quantization scales for second output + zero_points1: Quantization zero points for first output + zero_points2: Quantization zero points for second output + + Returns: + Tuple of (y1, y2, x) where: + - y1, y2 are quantized outputs + - x is the sum of input tensors + """ + # Add the two input tensors + x = x1 + x2 + + # Compute RMS (Root Mean Square) normalization + x_squared = x.pow(2) + x_rms = torch.sqrt(x_squared.mean( + dim=self.axis, keepdim=True) + self.epsilon) + x_rstd = 1.0 / x_rms + + # Apply normalization + x_normalized = x * x_rstd + + # Apply scale parameter + output = x_normalized * gamma + + # Quantize outputs + if self.div_mode: + y1_quantized = torch.round( + output / scales1 + zero_points1).clamp(-128, 127).to(torch.int8) + y2_quantized = torch.round( + output / scales2 + zero_points2).clamp(-128, 127).to(torch.int8) + else: + y1_quantized = torch.round( + output * scales1 + zero_points1).clamp(-128, 127).to(torch.int8) + y2_quantized = torch.round( + output * scales2 + zero_points2).clamp(-128, 127).to(torch.int8) + + return y1_quantized, y2_quantized, x + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other rms norm operations + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensors (using float16 as specified in README) + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float16) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float16) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float16) + + # Generate quantization parameters + scales1 = torch.rand(1, dtype=torch.float32) * 0.1 + 0.01 + scales2 = torch.rand(1, dtype=torch.float32) * 0.1 + 0.01 + zero_points1 = torch.zeros(1, dtype=torch.int32) + zero_points2 = torch.zeros(1, dtype=torch.int32) + + return [x1, x2, gamma, scales1, scales2, zero_points1, zero_points2] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-6, -1, False] # epsilon=1e-6, axis=-1, div_mode=False diff --git a/aikg/benchmark/aikgbench/llm/norm/BatchNormV3.py b/aikg/benchmark/aikgbench/llm/norm/BatchNormV3.py new file mode 100644 index 0000000000000000000000000000000000000000..0b65d91bfb28445be816bdef1e15726381d0f9e6 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/BatchNormV3.py @@ -0,0 +1,73 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs BatchNormV3 operation. + Based on + """ + + def __init__(self, momentum=0.1, epsilon=1e-5, is_training=True): + super(Model, self).__init__() + self.momentum = momentum + self.epsilon = epsilon + self.is_training = is_training + + def forward(self, x, weight, bias, running_mean, running_var): + """ + Perform BatchNormV3 operation. + + Args: + x: Input tensor + weight: Weight parameter tensor + bias: Bias parameter tensor + running_mean: Running mean tensor + running_var: Running variance tensor + + Returns: + Tuple of (output, running_mean, running_var, save_mean, save_rstd) + """ + # Use PyTorch's native batch norm + output, save_mean, save_rstd = torch.ops.aten.native_batch_norm( + input=x, + weight=weight, + bias=bias, + running_mean=running_mean, + running_var=running_var, + training=self.is_training, + momentum=self.momentum, + eps=self.epsilon + ) + + return output, running_mean, running_var, save_mean, save_rstd + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use the same shapes as in gen_data.py + batch_size, channels, height, width = 1, 2, 1, 4 + + # Generate input tensor + x = torch.randn(batch_size, channels, height, width, dtype=torch.float32) + + # Generate weight and bias parameters + weight = torch.ones(channels, dtype=torch.float32) + bias = torch.zeros(channels, dtype=torch.float32) + + # Generate running statistics + running_mean = torch.zeros(channels, dtype=torch.float32) + running_var = torch.ones(channels, dtype=torch.float32) + + return [x, weight, bias, running_mean, running_var] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [0.1, 1e-5, True] # momentum=0.1, epsilon=1e-5, is_training=True diff --git a/aikg/benchmark/aikgbench/llm/norm/DeepNorm.py b/aikg/benchmark/aikgbench/llm/norm/DeepNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..d1e9c905c43f92734f4174ad1c1566121634b692 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/DeepNorm.py @@ -0,0 +1,75 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs DeepNorm operation. + Based on + """ + + def __init__(self, alpha=0.3, epsilon=1e-6): + super(Model, self).__init__() + self.alpha = alpha + self.epsilon = epsilon + + def forward(self, x, gx, beta, gamma): + """ + Perform DeepNorm operation. + + Args: + x: Input tensor + gx: Gate tensor + beta: Shift parameter tensor + gamma: Scale parameter tensor + + Returns: + Tuple of (mean, rstd, y) where: + - mean is the mean of the normalized tensor + - rstd is the reciprocal standard deviation + - y is the normalized output tensor + """ + # Apply alpha scaling and add gate + x_add = x * self.alpha + gx + + # Compute mean and variance + mean = x_add.mean(-1, keepdim=True) + diff = x_add - mean + variance = diff.pow(2).mean(-1, keepdim=True) + + # Compute reciprocal standard deviation + rstd = torch.rsqrt(variance + self.epsilon) + + # Apply normalization + output = gamma * diff * rstd + beta + + return mean, rstd, output + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use the same shapes as in gen_data.py + batch_size, seq_len, hidden_size = 3, 1, 4 + + # Generate input tensors + x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + dtype=torch.float32).reshape(batch_size, seq_len, hidden_size) + gx = torch.tensor([2, 2, 2, 4, 4, 4, 6, 6, 6, 8, 8, 8], + dtype=torch.float32).reshape(batch_size, seq_len, hidden_size) + + # Generate beta and gamma parameters + beta = torch.tensor([0, 1, 2, 3], dtype=torch.float32) + gamma = torch.tensor([0, 1, 2, 3], dtype=torch.float32) + + return [x, gx, beta, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [0.3, 1e-6] # alpha=0.3, epsilon=1e-6 diff --git a/aikg/benchmark/aikgbench/llm/norm/DeepNormGrad.py b/aikg/benchmark/aikgbench/llm/norm/DeepNormGrad.py new file mode 100644 index 0000000000000000000000000000000000000000..f7d29bd841efbd10dbef6aaf2986e001dc487f21 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/DeepNormGrad.py @@ -0,0 +1,99 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs DeepNormGrad operation. + Based on + """ + + def __init__(self, alpha=0.3): + super(Model, self).__init__() + self.alpha = alpha + + def forward(self, dy, x, gx, gamma, mean, rstd): + """ + Perform DeepNormGrad operation (backward pass). + + Args: + dy: Gradient of output + x: Input tensor from forward pass + gx: Gate tensor from forward pass + gamma: Scale parameter tensor + mean: Mean from forward pass + rstd: Reciprocal standard deviation from forward pass + + Returns: + Tuple of (dx, dgx, dbeta, dgamma) where: + - dx is the gradient with respect to input x + - dgx is the gradient with respect to gate gx + - dbeta is the gradient with respect to beta + - dgamma is the gradient with respect to gamma + """ + # Apply alpha scaling and add gate (same as forward pass) + x_add = x * self.alpha + gx + + # Compute normalized output (same as forward pass) + diff = x_add - mean + output = gamma * diff * rstd + + # Compute gradients for DeepNorm backward + # This is a simplified implementation of the backward pass + + # Gradient with respect to gamma + dgamma = torch.sum(dy * diff * rstd, dim=0) + + # Gradient with respect to beta (simplified) + dbeta = torch.sum(dy, dim=0) + + # Gradient with respect to normalized output + dy_normalized = dy * gamma * rstd + + # Gradient with respect to x_add + dx_add = dy_normalized + + # Gradient with respect to x and gx + dx = dx_add * self.alpha + dgx = dx_add + + return dx, dgx, dbeta, dgamma + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other deep norm operations + batch_size, seq_len, hidden_size = 3, 1, 4 + + # Generate input tensors + x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + dtype=torch.float32).reshape(batch_size, seq_len, hidden_size) + gx = torch.tensor([2, 2, 2, 4, 4, 4, 6, 6, 6, 8, 8, 8], + dtype=torch.float32).reshape(batch_size, seq_len, hidden_size) + + # Generate gradient of output + dy = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate statistics from forward pass + alpha = 0.3 + x_add = x * alpha + gx + mean = x_add.mean(-1, keepdim=True) + diff = x_add - mean + variance = diff.pow(2).mean(-1, keepdim=True) + 1e-6 + rstd = 1.0 / torch.sqrt(variance) + + # Generate gamma parameter + gamma = torch.tensor([0, 1, 2, 3], dtype=torch.float32) + + return [dy, x, gx, gamma, mean, rstd] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [0.3] # alpha=0.3 diff --git a/aikg/benchmark/aikgbench/llm/norm/DuaQuantizeAddLayerNorm.py b/aikg/benchmark/aikgbench/llm/norm/DuaQuantizeAddLayerNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..2b6d18b185722530cb09e29bb656be73ad8ee623 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/DuaQuantizeAddLayerNorm.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs DuaQuantizeAddLayerNorm operation. + Based on + """ + + def __init__(self, dtype=torch.float32, axis=-1, epsilon=1e-6, additional_output=False): + super(Model, self).__init__() + self.dtype = dtype + self.axis = axis + self.epsilon = epsilon + self.additional_output = additional_output + + def forward(self, x1, x2, gamma, beta, bias, scales1, scales2, zero_points1, zero_points2): + """ + Perform DuaQuantizeAddLayerNorm operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + bias: Bias tensor + scales1: Quantization scales for first output + scales2: Quantization scales for second output + zero_points1: Quantization zero points for first output + zero_points2: Quantization zero points for second output + + Returns: + Tuple of (y1, y2, x) where: + - y1, y2 are quantized outputs + - x is the sum of input tensors + """ + # Add the two input tensors + if bias is not None: + x = x1 + x2 + bias + else: + x = x1 + x2 + + # Get input shape and reshape for layer norm + input_shape = x.shape + row_size = x.shape[-1] + row_count = 1 + for i in range(0, len(input_shape) - 1): + row_count *= input_shape[i] + + x_shape = (row_count, row_size) + x_mean_shape = (row_count, 1) + + # Reshape for layer norm computation + x_reshaped = x.reshape(x_shape) + + # Compute mean and variance + x_mean = torch.mean(x_reshaped, dim=1, keepdim=True) + x_var = torch.var(x_reshaped, dim=1, keepdim=True, + unbiased=False) + self.epsilon + x_rstd = 1.0 / torch.sqrt(x_var) + + # Broadcast tensors to match x_shape + x_mean_broadcast = x_mean.expand(x_shape) + x_rstd_broadcast = x_rstd.expand(x_shape) + gamma_broadcast = gamma.expand(x_shape) + beta_broadcast = beta.expand(x_shape) + + # Apply layer normalization + y = torch.multiply(torch.multiply( + x_reshaped - x_mean_broadcast, x_rstd_broadcast), gamma_broadcast) + beta_broadcast + + # Reshape back to original shape + y = y.reshape(input_shape) + + # Quantize outputs + y1_quantized = torch.round( + y / scales1 + zero_points1).clamp(-128, 127).to(torch.int8) + y2_quantized = torch.round( + y / scales2 + zero_points2).clamp(-128, 127).to(torch.int8) + + return y1_quantized, y2_quantized, x + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other layer norm operations + batch_size, seq_len, hidden_size = 1, 2, 8 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma and beta parameters + gamma = torch.randn(hidden_size, dtype=torch.float32) + beta = torch.randn(hidden_size, dtype=torch.float32) + + # Generate bias + bias = torch.randn(hidden_size, dtype=torch.float32) + + # Generate quantization parameters + scales1 = torch.rand(1, dtype=torch.float32) * 0.1 + 0.01 + scales2 = torch.rand(1, dtype=torch.float32) * 0.1 + 0.01 + zero_points1 = torch.zeros(1, dtype=torch.float32) + zero_points2 = torch.zeros(1, dtype=torch.float32) + + return [x1, x2, gamma, beta, bias, scales1, scales2, zero_points1, zero_points2] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [torch.float32, -1, 1e-6, False] # dtype=float32, axis=-1, epsilon=1e-6, additional_output=False diff --git a/aikg/benchmark/aikgbench/llm/norm/GroupNormGrad.py b/aikg/benchmark/aikgbench/llm/norm/GroupNormGrad.py new file mode 100644 index 0000000000000000000000000000000000000000..da943c3230cf095dd6d4fe04c721f4c223018789 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/GroupNormGrad.py @@ -0,0 +1,110 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs GroupNormGrad operation. + Based on + """ + + def __init__(self, num_groups=2, data_format="NCHW", dx_is_require=True, dgamma_is_require=True, dbeta_is_require=True): + super(Model, self).__init__() + self.num_groups = num_groups + self.data_format = data_format + self.dx_is_require = dx_is_require + self.dgamma_is_require = dgamma_is_require + self.dbeta_is_require = dbeta_is_require + + def forward(self, dy, mean, rstd, x, gamma): + """ + Perform GroupNormGrad operation (backward pass). + + Args: + dy: Gradient of output + mean: Mean from forward pass + rstd: Reciprocal standard deviation from forward pass + x: Input tensor from forward pass + gamma: Scale parameter tensor + + Returns: + Tuple of (dx, dgamma, dbeta) where: + - dx is the gradient with respect to input + - dgamma is the gradient with respect to gamma + - dbeta is the gradient with respect to beta + """ + # Get input dimensions + N, C = x.shape[:2] + remaining_dims = x.shape[2:] + HxW = 1 + for size in remaining_dims: + HxW *= size + + # Reshape for group norm computation + x_reshaped = x.reshape(N, self.num_groups, C // self.num_groups, HxW) + dy_reshaped = dy.reshape(N, self.num_groups, C // self.num_groups, HxW) + + # Compute gradients for group norm backward + # This is a simplified implementation of the backward pass + if self.dgamma_is_require: + dgamma = torch.sum(dy * (x - mean) * rstd, dim=0) + else: + dgamma = torch.zeros_like(gamma) + + if self.dbeta_is_require: + dbeta = torch.sum(dy, dim=0) + else: + dbeta = torch.zeros_like(gamma) + + # Compute gradient with respect to input + if self.dx_is_require: + dx = dy * gamma * rstd + else: + dx = torch.zeros_like(x) + + return dx, dgamma, dbeta + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other group norm operations + batch_size, channels, height, width = 4, 2, 8, 8 + + # Generate input tensor + x = torch.randn(batch_size, channels, height, width, dtype=torch.float32) + + # Generate gradient of output + dy = torch.randn(batch_size, channels, height, width, dtype=torch.float32) + + # Generate statistics from forward pass + N, C = x.shape[:2] + remaining_dims = x.shape[2:] + HxW = 1 + for size in remaining_dims: + HxW *= size + + x_reshaped = x.reshape(N, 2, C // 2, HxW) # num_groups=2 + mean = torch.mean(x_reshaped, dim=(2, 3), keepdim=True) + var = torch.var(x_reshaped, dim=(2, 3), + keepdim=True, unbiased=False) + 1e-6 + rstd = 1.0 / torch.sqrt(var) + + # Reshape back to original shape + mean = mean.reshape(x.shape) + rstd = rstd.reshape(x.shape) + + # Generate gamma parameter + gamma = torch.randn(channels, dtype=torch.float32) + + return [dy, mean, rstd, x, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [2, "NCHW", True, True, True] # num_groups=2, data_format="NCHW", dx_is_require=True, dgamma_is_require=True, dbeta_is_require=True diff --git a/aikg/benchmark/aikgbench/llm/norm/GroupNormSilu.py b/aikg/benchmark/aikgbench/llm/norm/GroupNormSilu.py new file mode 100644 index 0000000000000000000000000000000000000000..4f0108f069434210d1e007ba27e5aeafe8b06b2b --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/GroupNormSilu.py @@ -0,0 +1,83 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs GroupNormSilu operation. + Based on + """ + + def __init__(self, num_groups=2, epsilon=1e-6, activate_silu=True): + super(Model, self).__init__() + self.num_groups = num_groups + self.epsilon = epsilon + self.activate_silu = activate_silu + + def forward(self, x, gamma, beta): + """ + Perform GroupNormSilu operation. + + Args: + x: Input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + + Returns: + Tuple of (output, mean_out, rstd_out) where: + - output is the normalized and silu-activated tensor + - mean_out is the mean of each group + - rstd_out is the reciprocal standard deviation of each group + """ + # Get input dimensions + N, C = x.shape[:2] + remaining_dims = x.shape[2:] + HxW = 1 + for size in remaining_dims: + HxW *= size + + # Use PyTorch's native group norm + output, mean_out, rstd_out = torch.ops.aten.native_group_norm( + input=x, + weight=gamma, + bias=beta, + N=N, + C=C, + HxW=HxW, + group=self.num_groups, + eps=self.epsilon + ) + + # Apply SiLU activation if enabled + if self.activate_silu: + sigmoid_out = 1 / (1 + torch.exp(-output)) + output = output * sigmoid_out + + return output, mean_out, rstd_out + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use the same shapes as in gen_data.py + batch_size, channels, height, width = 4, 2, 8, 8 + + # Generate input tensor + x = torch.rand(batch_size, channels, height, width, + dtype=torch.float32) * 0.9 + 0.1 + + # Generate gamma and beta parameters + gamma = torch.rand(channels, dtype=torch.float32) * 0.9 + 0.1 + beta = torch.rand(channels, dtype=torch.float32) * 0.9 + 0.1 + + return [x, gamma, beta] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [2, 1e-6, True] # num_groups=2, epsilon=1e-6, activate_silu=True diff --git a/aikg/benchmark/aikgbench/llm/norm/GroupNormSwish.py b/aikg/benchmark/aikgbench/llm/norm/GroupNormSwish.py new file mode 100644 index 0000000000000000000000000000000000000000..86336b6cafbf3b2821affe17197b8ab61ef3823e --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/GroupNormSwish.py @@ -0,0 +1,84 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs GroupNormSwish operation. + Based on + """ + + def __init__(self, num_groups=8, epsilon=1e-5, activate_swish=True, swish_scale=1.0): + super(Model, self).__init__() + self.num_groups = num_groups + self.epsilon = epsilon + self.activate_swish = activate_swish + self.swish_scale = swish_scale + + def forward(self, x, gamma, beta): + """ + Perform GroupNormSwish operation. + + Args: + x: Input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + + Returns: + Tuple of (output, mean_out, rstd_out) where: + - output is the normalized and swish-activated tensor + - mean_out is the mean of each group + - rstd_out is the reciprocal standard deviation of each group + """ + # Get input dimensions + N, C = x.shape[:2] + remaining_dims = x.shape[2:] + HxW = 1 + for size in remaining_dims: + HxW *= size + + # Use PyTorch's native group norm + output, mean_out, rstd_out = torch.ops.aten.native_group_norm( + input=x, + weight=gamma, + bias=beta, + N=N, + C=C, + HxW=HxW, + group=self.num_groups, + eps=self.epsilon + ) + + # Apply Swish activation if enabled + if self.activate_swish: + sigmoid_out = 1 / (1 + torch.exp(-self.swish_scale * output)) + output = output * sigmoid_out + + return output, mean_out, rstd_out + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use the same shapes as in gen_data.py + N, C = 100, 32 + x_shape = (N, C) + + # Generate input tensor + x = torch.rand(x_shape, dtype=torch.float16) + + # Generate gamma and beta parameters + gamma = torch.rand(C, dtype=torch.float16) + beta = torch.rand(C, dtype=torch.float16) + + return [x, gamma, beta] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [8, 1e-5, True, 1.0] # num_groups=8, epsilon=1e-5, activate_swish=True, swish_scale=1.0 diff --git a/aikg/benchmark/aikgbench/llm/norm/GroupNormSwishGrad.py b/aikg/benchmark/aikgbench/llm/norm/GroupNormSwishGrad.py new file mode 100644 index 0000000000000000000000000000000000000000..447998e7932588b5e4f839860509f3b5f095fae2 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/GroupNormSwishGrad.py @@ -0,0 +1,123 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs GroupNormSwishGrad operation. + Based on + """ + + def __init__(self, num_groups=2, data_format="NCHW", swish_scale=1.0, dgamma_is_require=True, dbeta_is_require=True): + super(Model, self).__init__() + self.num_groups = num_groups + self.data_format = data_format + self.swish_scale = swish_scale + self.dgamma_is_require = dgamma_is_require + self.dbeta_is_require = dbeta_is_require + + def forward(self, dy, mean, rstd, x, gamma, beta): + """ + Perform GroupNormSwishGrad operation (backward pass). + + Args: + dy: Gradient of output + mean: Mean from forward pass + rstd: Reciprocal standard deviation from forward pass + x: Input tensor from forward pass + gamma: Scale parameter tensor + beta: Shift parameter tensor + + Returns: + Tuple of (dx_out, dgamma_out, dbeta_out) where: + - dx_out is the gradient with respect to input + - dgamma_out is the gradient with respect to gamma + - dbeta_out is the gradient with respect to beta + """ + # Get input dimensions + N, C = x.shape[:2] + remaining_dims = x.shape[2:] + HxW = 1 + for size in remaining_dims: + HxW *= size + + # Reshape for group norm computation + x_reshaped = x.reshape(N, self.num_groups, C // self.num_groups, HxW) + + # Apply group normalization + x_normalized = (x_reshaped - mean) * rstd + output = x_normalized * gamma + beta + + # Apply Swish activation + sigmoid_out = 1 / (1 + torch.exp(-self.swish_scale * output)) + swish_output = output * sigmoid_out + + # Compute gradients for Swish backward + # d(swish_output)/d(output) = sigmoid_out + output * sigmoid_out * (1 - sigmoid_out) * swish_scale + swish_grad = sigmoid_out + swish_output * \ + (1 - sigmoid_out) * self.swish_scale + + # Apply gradient through Swish + dy_swish = dy * swish_grad + + # Compute gradients for group norm backward + if self.dgamma_is_require: + dgamma_out = torch.sum(dy_swish * x_normalized, dim=0) + else: + dgamma_out = torch.zeros_like(gamma) + + if self.dbeta_is_require: + dbeta_out = torch.sum(dy_swish, dim=0) + else: + dbeta_out = torch.zeros_like(beta) + + # Compute gradient with respect to input + dx_out = dy_swish * gamma * rstd + + return dx_out, dgamma_out, dbeta_out + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other group norm operations + batch_size, channels, height, width = 4, 2, 8, 8 + + # Generate input tensor + x = torch.randn(batch_size, channels, height, width, dtype=torch.float32) + + # Generate gradient of output + dy = torch.randn(batch_size, channels, height, width, dtype=torch.float32) + + # Generate statistics from forward pass + N, C = x.shape[:2] + remaining_dims = x.shape[2:] + HxW = 1 + for size in remaining_dims: + HxW *= size + + x_reshaped = x.reshape(N, 2, C // 2, HxW) # num_groups=2 + mean = torch.mean(x_reshaped, dim=(2, 3), keepdim=True) + var = torch.var(x_reshaped, dim=(2, 3), + keepdim=True, unbiased=False) + 1e-6 + rstd = 1.0 / torch.sqrt(var) + + # Reshape back to original shape + mean = mean.reshape(x.shape) + rstd = rstd.reshape(x.shape) + + # Generate gamma and beta parameters + gamma = torch.randn(channels, dtype=torch.float32) + beta = torch.randn(channels, dtype=torch.float32) + + return [dy, mean, rstd, x, gamma, beta] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [2, "NCHW", 1.0, True, True] # num_groups=2, data_format="NCHW", swish_scale=1.0, dgamma_is_require=True, dbeta_is_require=True diff --git a/aikg/benchmark/aikgbench/llm/norm/InplaceAddLayerNorm.py b/aikg/benchmark/aikgbench/llm/norm/InplaceAddLayerNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..d4a47d5f977df4f7d549da1ee6a869249d0a8b60 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/InplaceAddLayerNorm.py @@ -0,0 +1,104 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs InplaceAddLayerNorm operation. + Based on + """ + + def __init__(self, epsilon=1e-6, additional_output=False): + super(Model, self).__init__() + self.epsilon = epsilon + self.additional_output = additional_output + + def forward(self, x1, x2, gamma, beta, bias=None): + """ + Perform InplaceAddLayerNorm operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + bias: Optional bias tensor + + Returns: + Tuple of (y, mean, rstd, x) if additional_output=True + Otherwise just the output tensor + """ + # Add the two input tensors (inplace operation simulated) + if bias is not None: + x = x1 + x2 + bias + else: + x = x1 + x2 + + # Get input shape and reshape for layer norm + input_shape = x.shape + row_size = x.shape[-1] + row_count = 1 + for i in range(0, len(input_shape) - 1): + row_count *= input_shape[i] + + x_shape = (row_count, row_size) + x_mean_shape = (row_count, 1) + + # Reshape for layer norm computation + x_reshaped = x.reshape(x_shape) + + # Compute mean and variance + x_mean = torch.mean(x_reshaped, dim=1, keepdim=True) + x_var = torch.var(x_reshaped, dim=1, keepdim=True, + unbiased=False) + self.epsilon + x_rstd = 1.0 / torch.sqrt(x_var) + + # Broadcast tensors to match x_shape + x_mean_broadcast = x_mean.expand(x_shape) + x_rstd_broadcast = x_rstd.expand(x_shape) + gamma_broadcast = gamma.expand(x_shape) + beta_broadcast = beta.expand(x_shape) + + # Apply layer normalization + y = torch.multiply(torch.multiply( + x_reshaped - x_mean_broadcast, x_rstd_broadcast), gamma_broadcast) + beta_broadcast + + # Reshape back to original shape + y = y.reshape(input_shape) + x_mean = x_mean.reshape(input_shape[:-1] + (1,)) + x_rstd = x_rstd.reshape(input_shape[:-1] + (1,)) + + if self.additional_output: + return y, x_mean, x_rstd, x + else: + return y + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other layer norm operations + batch_size, seq_len, hidden_size = 1, 2, 8 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma and beta parameters + gamma = torch.randn(hidden_size, dtype=torch.float32) + beta = torch.randn(hidden_size, dtype=torch.float32) + + # Generate optional bias + bias = torch.randn(hidden_size, dtype=torch.float32) + + return [x1, x2, gamma, beta, bias] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-6, True] # epsilon=1e-6, additional_output=True diff --git a/aikg/benchmark/aikgbench/llm/norm/InplaceAddRmsNorm.py b/aikg/benchmark/aikgbench/llm/norm/InplaceAddRmsNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..e416468c7ef48f921e78dfd5b4e228f9b82049fb --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/InplaceAddRmsNorm.py @@ -0,0 +1,71 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs InplaceAddRmsNorm operation. + Based on + """ + + def __init__(self, epsilon=1e-6): + super(Model, self).__init__() + self.epsilon = epsilon + + def forward(self, x1, x2, gamma): + """ + Perform InplaceAddRmsNorm operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + + Returns: + Tuple of (output, rstd, x) where: + - output is the normalized tensor + - rstd is the reciprocal standard deviation + - x is the sum of input tensors + """ + # Add the two input tensors (inplace operation simulated) + x = x1 + x2 + + # Compute RMS (Root Mean Square) normalization + # Unlike LayerNorm, RmsNorm doesn't subtract the mean + x_squared = x.pow(2) + x_rms = torch.sqrt(x_squared.mean(dim=-1, keepdim=True) + self.epsilon) + x_rstd = 1.0 / x_rms + + # Apply normalization + x_normalized = x * x_rstd + + # Apply scale parameter + output = x_normalized * gamma + + return output, x_rstd, x + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other rms norm operations + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float32) + + return [x1, x2, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-6] # epsilon=1e-6 diff --git a/aikg/benchmark/aikgbench/llm/norm/InstanceNormV3.py b/aikg/benchmark/aikgbench/llm/norm/InstanceNormV3.py new file mode 100644 index 0000000000000000000000000000000000000000..32d67005e0eefe46cb308a0ee765af989e1bc136 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/InstanceNormV3.py @@ -0,0 +1,80 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs InstanceNormV3 operation. + Based on + """ + + def __init__(self, epsilon=1e-5, data_format="NCHW"): + super(Model, self).__init__() + self.epsilon = epsilon + self.data_format = data_format + + def forward(self, x, gamma, beta): + """ + Perform InstanceNormV3 operation. + + Args: + x: Input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + + Returns: + Tuple of (output, mean, variance) where: + - output is the normalized tensor + - mean is the mean of each instance + - variance is the variance of each instance + """ + # Determine reduction axes based on data format + if self.data_format == 'NHWC': + reduce_axis = [1, 2] + gamma = gamma.reshape([1, 1, 1, gamma.shape[0]]) + beta = beta.reshape([1, 1, 1, beta.shape[0]]) + else: # NCHW + reduce_axis = [2, 3] + gamma = gamma.reshape([1, gamma.shape[0], 1, 1]) + beta = beta.reshape([1, beta.shape[0], 1, 1]) + + # Compute mean and variance + mean = torch.mean(x, dim=reduce_axis, keepdim=True) + var = torch.mean(torch.pow((x - mean), 2), + dim=reduce_axis, keepdim=True) + + # Compute reciprocal standard deviation + rstd = 1 / torch.sqrt(var + self.epsilon) + + # Apply normalization + tmp_x = (x - mean) * rstd + output = tmp_x * gamma + beta + + return output, mean, var + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use the same shapes as in gen_data.py + batch_size, channels, height, width = 1, 8, 4, 4 + + # Generate input tensor + x = torch.ones(batch_size, channels, height, + width, dtype=torch.float32) * 0.77 + + # Generate gamma and beta parameters + gamma = torch.ones(channels, dtype=torch.float32) * 1.5 + beta = torch.ones(channels, dtype=torch.float32) * 0.5 + + return [x, gamma, beta] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-5, "NCHW"] # epsilon=1e-5, data_format="NCHW" diff --git a/aikg/benchmark/aikgbench/llm/norm/LayerNormGradV3.py b/aikg/benchmark/aikgbench/llm/norm/LayerNormGradV3.py new file mode 100644 index 0000000000000000000000000000000000000000..fec7536ac2a7eea5449d7ae2a0fbddc68fff9c11 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/LayerNormGradV3.py @@ -0,0 +1,76 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs LayerNormGradV3 operation. + Based on + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, dy, x, rstd, mean, gamma): + """ + Perform LayerNormGradV3 operation (backward pass). + + Args: + dy: Gradient of output + x: Input tensor from forward pass + rstd: Reciprocal standard deviation from forward pass + mean: Mean from forward pass + gamma: Scale parameter tensor + + Returns: + Tuple of (pd_x, pd_gamma, pd_beta) where: + - pd_x is the gradient with respect to input + - pd_gamma is the gradient with respect to gamma + - pd_beta is the gradient with respect to beta + """ + # Compute gradients for layer norm backward + # This is a simplified implementation of the backward pass + + # Gradient with respect to gamma + pd_gamma = torch.sum(dy * (x - mean) * rstd, dim=0) + + # Gradient with respect to beta + pd_beta = torch.sum(dy, dim=0) + + # Gradient with respect to input + pd_x = dy * gamma * rstd + + return pd_x, pd_gamma, pd_beta + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other layer norm operations + batch_size, seq_len, hidden_size = 1, 2, 32 + + # Generate input tensor + x = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gradient of output + dy = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate statistics from forward pass + mean = torch.mean(x, dim=-1, keepdim=True) + var = torch.var(x, dim=-1, keepdim=True, unbiased=False) + 1e-6 + rstd = 1.0 / torch.sqrt(var) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float32) + + return [dy, x, rstd, mean, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For gradient operations, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/norm/LayerNormV4.py b/aikg/benchmark/aikgbench/llm/norm/LayerNormV4.py new file mode 100644 index 0000000000000000000000000000000000000000..3e01458b3f5abbf8d9299565c1b109e80883a474 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/LayerNormV4.py @@ -0,0 +1,68 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs LayerNormV4 operation. + Based on + """ + + def __init__(self, epsilon=1e-5): + super(Model, self).__init__() + self.epsilon = epsilon + + def forward(self, x, normalized_shape, gamma, beta): + """ + Perform LayerNormV4 operation. + + Args: + x: Input tensor + normalized_shape: Shape of the normalized dimensions + gamma: Scale parameter tensor + beta: Shift parameter tensor + + Returns: + Tuple of (output, mean, rstd) where: + - output is the normalized tensor + - mean is the mean of the normalized dimensions + - rstd is the reciprocal standard deviation + """ + # Use PyTorch's native layer norm + output, mean, variance = torch.ops.aten.native_layer_norm( + x, normalized_shape, gamma, beta, eps=self.epsilon + ) + + # Convert variance to rstd (reciprocal standard deviation) + rstd = 1.0 / torch.sqrt(variance + self.epsilon) + + return output, mean, rstd + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use the same shapes as in gen_data.py + batch_size, seq_len, hidden_size = 1, 2, 32 + + # Generate input tensor + x = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate normalized_shape (same as gamma shape) + normalized_shape = (hidden_size,) + + # Generate gamma and beta parameters + gamma = torch.ones(hidden_size, dtype=torch.float32) + beta = torch.zeros(hidden_size, dtype=torch.float32) + + return [x, normalized_shape, gamma, beta] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [1e-5] # epsilon=1e-5 diff --git a/aikg/benchmark/aikgbench/llm/norm/QuantizeAddLayerNorm.py b/aikg/benchmark/aikgbench/llm/norm/QuantizeAddLayerNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..a690077d52d35e345587976a542e37538f46db38 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/QuantizeAddLayerNorm.py @@ -0,0 +1,112 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs QuantizeAddLayerNorm operation. + Based on + """ + + def __init__(self, dtype=torch.float32, axis=-1, epsilon=1e-6, additional_output=False): + super(Model, self).__init__() + self.dtype = dtype + self.axis = axis + self.epsilon = epsilon + self.additional_output = additional_output + + def forward(self, x1, x2, gamma, beta, bias, scales, zero_points): + """ + Perform QuantizeAddLayerNorm operation. + + Args: + x1: First input tensor + x2: Second input tensor + gamma: Scale parameter tensor + beta: Shift parameter tensor + bias: Bias tensor + scales: Quantization scales + zero_points: Quantization zero points + + Returns: + Tuple of (y, x) where: + - y is the quantized output + - x is the sum of input tensors + """ + # Add the two input tensors + if bias is not None: + x = x1 + x2 + bias + else: + x = x1 + x2 + + # Get input shape and reshape for layer norm + input_shape = x.shape + row_size = x.shape[-1] + row_count = 1 + for i in range(0, len(input_shape) - 1): + row_count *= input_shape[i] + + x_shape = (row_count, row_size) + x_mean_shape = (row_count, 1) + + # Reshape for layer norm computation + x_reshaped = x.reshape(x_shape) + + # Compute mean and variance + x_mean = torch.mean(x_reshaped, dim=1, keepdim=True) + x_var = torch.var(x_reshaped, dim=1, keepdim=True, + unbiased=False) + self.epsilon + x_rstd = 1.0 / torch.sqrt(x_var) + + # Broadcast tensors to match x_shape + x_mean_broadcast = x_mean.expand(x_shape) + x_rstd_broadcast = x_rstd.expand(x_shape) + gamma_broadcast = gamma.expand(x_shape) + beta_broadcast = beta.expand(x_shape) + + # Apply layer normalization + y = torch.multiply(torch.multiply( + x_reshaped - x_mean_broadcast, x_rstd_broadcast), gamma_broadcast) + beta_broadcast + + # Reshape back to original shape + y = y.reshape(input_shape) + + # Quantize output + y_quantized = torch.round( + y / scales + zero_points).clamp(-128, 127).to(torch.int8) + + return y_quantized, x + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other layer norm operations + batch_size, seq_len, hidden_size = 1, 2, 8 + + # Generate input tensors + x1 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + x2 = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma and beta parameters + gamma = torch.randn(hidden_size, dtype=torch.float32) + beta = torch.randn(hidden_size, dtype=torch.float32) + + # Generate bias + bias = torch.randn(hidden_size, dtype=torch.float32) + + # Generate quantization parameters + scales = torch.rand(1, dtype=torch.float32) * 0.1 + 0.01 + zero_points = torch.zeros(1, dtype=torch.float32) + + return [x1, x2, gamma, beta, bias, scales, zero_points] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + Based on parameters + """ + return [torch.float32, -1, 1e-6, False] # dtype=float32, axis=-1, epsilon=1e-6, additional_output=False diff --git a/aikg/benchmark/aikgbench/llm/norm/RmsNorm.py b/aikg/benchmark/aikgbench/llm/norm/RmsNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..a621ac569c3eaa84a6c5e749fa3e22db58d5760d --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/RmsNorm.py @@ -0,0 +1,60 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs RmsNorm operation. + """ + + def __init__(self, epsilon=1e-6): + super(Model, self).__init__() + self.epsilon = epsilon + + def forward(self, x, gamma): + """ + Perform RmsNorm operation. + + Args: + x: Input tensor + gamma: Scale parameter tensor + + Returns: + Tuple of (output, rstd) where output is the normalized tensor + """ + # Compute RMS (Root Mean Square) normalization + # Unlike LayerNorm, RmsNorm doesn't subtract the mean + x_squared = x.pow(2) + x_rms = torch.sqrt(x_squared.mean(dim=-1, keepdim=True) + self.epsilon) + x_rstd = 1.0 / x_rms + + # Apply normalization + x_normalized = x * x_rstd + + # Apply scale parameter + output = x_normalized * gamma + + return output, x_rstd + + +def get_inputs(): + """ + Generate random input tensors for testing. + """ + # Use the same shapes as in gen_data.py + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensor + x = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float32) + + return [x, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + """ + return [1e-6] # epsilon=1e-6 diff --git a/aikg/benchmark/aikgbench/llm/norm/RmsNormGrad.py b/aikg/benchmark/aikgbench/llm/norm/RmsNormGrad.py new file mode 100644 index 0000000000000000000000000000000000000000..7c240ce5a14c11137328350f43dcef2644f687c7 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/norm/RmsNormGrad.py @@ -0,0 +1,70 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs RmsNormGrad operation. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, dy, x, rstd, gamma): + """ + Perform RmsNormGrad operation (backward pass). + + Args: + dy: Gradient of output + x: Input tensor from forward pass + rstd: Reciprocal standard deviation from forward pass + gamma: Scale parameter tensor + + Returns: + Tuple of (dx, dgamma) where: + - dx is the gradient with respect to input + - dgamma is the gradient with respect to gamma + """ + # Compute gradients for RMS norm backward + # This is a simplified implementation of the backward pass + + # Gradient with respect to gamma + dgamma = torch.sum(dy * x * rstd, dim=0) + + # Gradient with respect to input + dx = dy * gamma * rstd + + return dx, dgamma + + +def get_inputs(): + """ + Generate random input tensors for testing. + Based on + """ + # Use similar shapes as other rms norm operations + batch_size, seq_len, hidden_size = 2, 1, 16 + + # Generate input tensor + x = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate gradient of output + dy = torch.randn(batch_size, seq_len, hidden_size, dtype=torch.float32) + + # Generate statistics from forward pass + x_squared = x.pow(2) + x_rms = torch.sqrt(x_squared.mean(dim=-1, keepdim=True) + 1e-6) + rstd = 1.0 / x_rms + + # Generate gamma parameter + gamma = torch.randn(hidden_size, dtype=torch.float32) + + return [dy, x, rstd, gamma] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For gradient operations, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/quant/ElewiseDequantPerChannel.py b/aikg/benchmark/aikgbench/llm/quant/ElewiseDequantPerChannel.py new file mode 100644 index 0000000000000000000000000000000000000000..2de17c4f314d0ba3d6aa292efc9ff35b82c3cc9c --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/quant/ElewiseDequantPerChannel.py @@ -0,0 +1,67 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs per-channel dequantization on input tensor. + """ + + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, scale, offset=None): + """ + Perform per-channel dequantization on input tensor. + + Args: + x: Input int8 tensor + scale: Scale tensor for dequantization + offset: Offset tensor for dequantization (optional) + + Returns: + Dequantized float16 tensor + """ + # Convert to float32 for processing + x_float = x.to(torch.float32) + scale_float = scale.to(torch.float32) + + if offset is not None: + offset_float = offset.to(torch.float32) + # Dequantize with offset + x_dequant = (x_float - offset_float) * scale_float + else: + # Dequantize without offset + x_dequant = x_float * scale_float + + # Clip to float16 range and convert to float16 + out = torch.clamp(x_dequant, -65504, 65504) + out = out.to(torch.float16) + + return out + + +# Model parameters - using the same shape as in test_dequant_per_channel.py +shape = (10, 8192) # Default shape from test case +scale_shape = (8192,) # Scale shape +batch_size = 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_dequant_per_channel.py + """ + # Generate random tensors similar to test_dequant_per_channel.py (range [-5, 5]) + input0 = torch.randint(-5, 5, shape, dtype=torch.int8) + input1 = torch.rand(scale_shape) * 10 - 5 + input2 = torch.randint(-5, 5, scale_shape, dtype=torch.int8) + return [input0, input1, input2] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For per-channel dequantization, no specific initialization parameters are needed. + """ + return [] diff --git a/aikg/benchmark/aikgbench/llm/quant/ElewiseDynamicQuant.py b/aikg/benchmark/aikgbench/llm/quant/ElewiseDynamicQuant.py new file mode 100644 index 0000000000000000000000000000000000000000..6b9d14980e7180d2807243712ef8a245bbe56c19 --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/quant/ElewiseDynamicQuant.py @@ -0,0 +1,86 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs dynamic quantization on input tensor. + """ + + def __init__(self, asymmetric=False): + super(Model, self).__init__() + self.asymmetric = asymmetric + + def forward(self, x): + """ + Perform dynamic quantization on input tensor. + + Args: + x: Input tensor (float16/float32/bfloat16) + + Returns: + Tuple of (quantized_tensor, scale, offset) + - quantized_tensor: int8 tensor + - scale: float32 tensor for scaling + - offset: float32 tensor for offset (only for asymmetric) + """ + # Convert to float32 for processing + x_float = x.to(torch.float32) + + if self.asymmetric: + # Asymmetric quantization + row_max = torch.max(x_float, dim=-1, keepdim=True)[0] + row_min = torch.min(x_float, dim=-1, keepdim=True)[0] + out_scale = (row_max - row_min) / 255.0 + out_offset = -(row_max + row_min) / (2.0 * out_scale) + + # Avoid division by zero + out_scale = torch.where(out_scale == 0, torch.tensor( + 1e-6, device=out_scale.device), out_scale) + + x_scaled = x_float / out_scale + x_offset = x_scaled + out_offset + x_clipped = torch.clamp(x_offset, -128, 127) + out_x = torch.round(x_clipped) + + return (out_x.to(torch.int8), + out_scale.squeeze(-1).to(torch.float32), + out_offset.squeeze(-1).to(torch.float32)) + else: + # Symmetric quantization + input_abs = torch.abs(x_float) + scale = torch.max(input_abs, dim=-1, keepdim=True)[0] + out_scale = scale / 127.0 + + # Avoid division by zero + out_scale = torch.where(out_scale == 0, torch.tensor( + 1e-6, device=out_scale.device), out_scale) + + x_scaled = x_float * 127.0 / scale + out_x = torch.round(x_scaled) + + return (out_x.to(torch.int8), + out_scale.squeeze(-1).to(torch.float32)) + + +# Model parameters - using the same shape as in test_dynamic_quant.py +shape = (2, 32, 32) # Default shape from test case +batch_size = 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_dynamic_quant.py + """ + # Generate random tensors similar to test_dynamic_quant.py (range [-5, 10]) + input0 = torch.rand(shape) * 15 - 5 + return [input0] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For dynamic quantization, we need the asymmetric flag. + """ + return [False] # Default to symmetric quantization diff --git a/aikg/benchmark/aikgbench/llm/quant/ElewiseQuantPerChannel.py b/aikg/benchmark/aikgbench/llm/quant/ElewiseQuantPerChannel.py new file mode 100644 index 0000000000000000000000000000000000000000..702fe3e76314f0cfdfb362b549fd56a27aa4b42d --- /dev/null +++ b/aikg/benchmark/aikgbench/llm/quant/ElewiseQuantPerChannel.py @@ -0,0 +1,74 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + """ + Simple model that performs per-channel quantization on input tensor. + """ + + def __init__(self, min_neg_127=False): + super(Model, self).__init__() + self.min_neg_127 = min_neg_127 + + def forward(self, x, scale, offset=None): + """ + Perform per-channel quantization on input tensor. + + Args: + x: Input tensor (float16/float32/bfloat16) + scale: Scale tensor for quantization + offset: Offset tensor for quantization (optional) + + Returns: + Quantized int8 tensor + """ + # Convert to float32 for processing + x_float = x.to(torch.float32) + scale_float = scale.to(torch.float32) + + # Set lower bound based on configuration + int8_lower_bound = -127 if self.min_neg_127 else -128 + + # Avoid division by zero + scale_safe = torch.where(scale_float == 0, torch.tensor( + 1e-6, device=scale_float.device), scale_float) + + # Quantize + x_scaled = x_float / scale_safe + x_rounded = torch.round(x_scaled) + + if offset is not None: + offset_float = offset.to(torch.float32) + x_rounded = x_rounded + offset_float + + # Clip to int8 range + out = torch.clamp(x_rounded, int8_lower_bound, 127) + + return out.to(torch.int8) + + +# Model parameters - using the same shape as in test_quant_per_channel.py +shape = (10, 8192) # Default shape from test case +scale_shape = (8192,) # Scale shape +batch_size = 1 + + +def get_inputs(): + """ + Generate random input tensors for testing. + Returns tensors with different data types as tested in test_quant_per_channel.py + """ + # Generate random tensors similar to test_quant_per_channel.py (range [-5, 5]) + input0 = torch.rand(shape) * 10 - 5 + input1 = torch.rand(scale_shape) * 10 - 5 + input2 = torch.randint(-5, 5, scale_shape, dtype=torch.int8) + return [input0, input1, input2] + + +def get_init_inputs(): + """ + Return initialization parameters for the model. + For per-channel quantization, we need the min_neg_127 flag. + """ + return [False] # Default to -128 as lower bound