Ai
1 Star 1 Fork 0

LEVSONGSW/DeepLearnLog

Create your Gitee Account
Explore and code with more than 13.5 million developers,Free private repositories !:)
Sign up
文件
This repository doesn't specify license. Please pay attention to the specific project description and its upstream code dependency when using it.
Clone or Download
positionalEncoding.py 3.65 KB
Copy Edit Raw Blame History
LEVSONGSW authored 2025-10-20 20:17 +08:00 . RoPE Possition Encoding
# %%
import math
import torch.nn as nn
import torch
# %%
class positionalEncodingCosSin(nn.Module):
'''
@param:
d_model: 词嵌入的输出维度
dropout: 使用dropout的作用:为了提高模型的泛化性。以概率值取消位置掉位置编码,让模型自己学习上下文语义
max_len: 允许最大的Token数量, 一般都设置为2的n次方
'''
def __init__(self, d_model, dropout, max_len=5000) -> None:
super(positionalEncodingCosSin, self).__init__()
self.dropout= nn.Dropout(dropout)
# * 一次性生成大的空间位置编码
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1)
# 相当于 10000 ^ (2i/d_model)
div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000)) / d_model)
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
# 注册器,参与前向计算,不需要训练的持久变量,作为模型的一部分,不会被优化器更新
self.register_buffer('pe', pe)
def forward(self, x):
# x = x + self.pe[:, : x.size(1)].requires_grad_(False) # * 如果没有使用register_buffer(),那么需要使用requires_grad_()设置为False
x = x + self.pe[:, : x.size(1)]
return self.dropout(x)
# %%
class RelativePosition(nn.Module):
def __init__(self, num_units, max_relative_position):
super().__init__()
self.num_units = num_units
self.max_relative_position = max_relative_position
self.embeddings_table = nn.Parameter(torch.Tensor(max_relative_position * 2 + 1, num_units))
nn.init.xavier_uniform_(self.embeddings_table)
def forward(self, length_q, length_k):
range_vec_q = torch.arange(length_q)
range_vec_k = torch.arange(length_k)
distance_mat = range_vec_k[None, :] - range_vec_q[:, None]
distance_mat_clipped = torch.clamp(distance_mat, -self.max_relative_position, self.max_relative_position)
final_mat = distance_mat_clipped + self.max_relative_position
final_mat = torch.LongTensor(final_mat)
embeddings = self.embeddings_table[final_mat]
return embeddings
# %%
class RoPEPosition(nn.Module):
def __init__(self, d_model, max_len=50000, theta=10000):
super().__init__()
self.d_model = d_model
freqs = 1 / (theta ** (torch.arange(0, d_model, 2)[:d_model//2].float() / d_model))
# * 为每个token生成角度
t = torch.arange(max_len)
freqs_for_each_token = torch.outer(t, freqs) # 张量外积函数
# * 转换为复数
freqs_cis = torch.polar(
torch.ones_like(freqs_for_each_token),
freqs_for_each_token
) # 笛卡尔形式: a + bj 其中,torch.ones_like(freqs_for_each_token)代表每个位置的模长,freqs_for_each_token代表角度,则第一个元素值的复数为:1 * cos(θ) + 1 * sin(θ) j
self.register_buffer("freqs_cis", freqs_cis)
def forward(self, input, seq_len):
# 将实数向量转换为复数
input_complex = torch.view_as_complex(
input.float().reshape(*input.shape[:-1], -1, 2)
) # (batch_size, head, seq_len, d_model / 2)
freqs_cis = self.freqs_cis[:seq_len] # (seq_len, d_model / 2)
input_rotaed = input_complex * freqs_cis.unsqueeze(0).unsqueeze(0)
out = torch.view_as_real(input_rotaed).flatten(-2)
return out.type_as(input)
# %%
# if __name__=="__main__":
# rope = RoPEPosition(64)
# q = torch.randn(1, 8, 1024, 64)
# q_rope = rope(q, q.shape[-2])
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/levsongsw/deep-learn-log.git
git@gitee.com:levsongsw/deep-learn-log.git
levsongsw
deep-learn-log
DeepLearnLog
master

Search