验证中...
pythonData_4.txt
Raw Copy
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from pandas.plotting import scatter_matrix
# 绘制图
import matplotlib.pyplot as plt
#处理数据
def handleData():
# 导入源数据,CSV文件需要指定编码格式
mro = pd.read_csv('data_mro.csv')
# 空值处理
mro = mro.dropna(subset=['srsrp'])#删除空值的行
mro = mro.drop(['lteScPUSCHPRBNum'],axis=1)#删除列
mro = mro.reset_index(drop=True)#更新数据集索引
#非数值字段编码化
encoder = LabelEncoder()
label = mro.label
label_encoded = encoder.fit_transform(label)
#将编码字段加入到数据集中
mro['label_encoded'] = label_encoded
return mro
def drawMRO(mro):
#数据分布
mro.hist(bins=50,figsize=(20,20))
#数据相关性
attributes = ['srsrp','nrsrp1','nrsrp2','nrsrp3','nrsrp4','nrsrp5','nrsrp6']
scatter_matrix(mro[attributes],figsize=(30,20))
plt.show()
def seabornMRO(mro):
used_networks = mro.columns
#在seaborn中选择husl色盘中的8种颜色,饱和度为0.45
network_pal = sns.husl_palette(52,s=0.45)
#创建network取值与颜色的映射dictionary的数据结构
network_lut = dict(zip(map(str,used_networks),network_pal))
#使用network取值与颜色映射创建series对象,使其index与将要展示的关联系数矩阵Datafram对象相同
networks = mro.columns
network_colors = pd.Series(networks,index=mro.columns).map(network_lut)
#通过clustermap展示源数据(子集)的关联系数矩阵
#图例中间色对应值设置为0
#采用vlag内置色盘
#行与列头用之前设置的network_colors 的series对象,每一种network取值采用一种颜色
#线宽设置为0.75,图像大小设置为23*23
sns.clustermap(mro.corr(),center = 0,cmap = 'vlag',row_colors=network_colors,col_colors=network_colors,linewidths = 0.75,figsize=(23,23))
plt.show()
def featuresMRO(mro):
#构造与rsrp相关的新特性
mro['max_nrsrp'] = mro[['nrsrp1','nrsrp2','nrsrp3','nrsrp4','nrsrp5','nrsrp6']].max(axis=1)
mro['min_nrsrp'] = mro[['nrsrp1','nrsrp2','nrsrp3','nrsrp4','nrsrp5','nrsrp6']].min(axis=1)
mro['diff_rsrp'] = (mro['srsrp']-mro['max_nrsrp']).abs()
#pci模三相等数两统计需要创建辅助函数,然后将结算结果加入到数据集
same_mod3 = lambda pci1,pci2:int(pci1%3 == pci2%3)
same_mod3_cnt = lambda row:same_mod3(row['spci'],row['npci1'])+same_mod3(row['spci'],row['npci2'])+same_mod3(row['spci'],row['npci3'])+same_mod3(row['spci'],row['npci4'])+same_mod3(row['spci'],row['npci5'])+same_mod3(row['spci'],row['npci6'])
mro['same_mod3_cnt'] = mro.apply(same_mod3_cnt,axis=1)
#构建同频邻区数量属性特征
same_band_cnt = lambda row:int(row['searfcn'] == row['nearfcn1'])+int(row['searfcn'] == row['nearfcn2'])+int(row['searfcn'] == row['nearfcn3'])+int(row['searfcn'] == row['nearfcn4'])+int(row['searfcn'] == row['nearfcn5'])+int(row['searfcn'] == row['nearfcn6'])
mro['same_band_cnt'] = mro.apply(same_band_cnt,axis=1)
#再次计算目标标签与各个特征的相关性
print(mro.corr()['label_encoded'].sort_values(ascending=False))
def dataStandard(mro):
#数据标准化前先保存数据备份1
mro_partition = mro[['seci','label','label_encoded','srsrp','lteScPHR','lteScSinrUL','max_nrsrp','min_nrsrp','diff_rsrp','same_mod3_cnt','same_band_cnt']]
mro_partition.to_csv("mro_readyprocess.csv",index=False,sep=',')
#数据标准化
scaler = StandardScaler()
#选取需要进行数据标准化的数据
mro_partition2 = mro_partition[['srsrp','lteScPHR','lteScSinrUL','max_nrsrp','min_nrsrp','diff_rsrp','same_mod3_cnt','same_band_cnt']]
#标准化
features = scaler.fit_transform(mro_partition2)
#将标准化后的ndarray数据转化为dataframe
df = pd.DataFrame(features,columns=['srsrp','lteScPHR','lteScSinrUL','max_nrsrp','min_nrsrp','diff_rsrp','same_mod3_cnt','same_band_cnt'])
#合并数据集
mro_partition3 = pd.DataFrame(columns=['seci','label','label_encoded','srsrp','lteScPHR','lteScSinrUL','max_nrsrp','min_nrsrp','diff_rsrp','same_mod3_cnt','same_band_cnt'])
mro_partition3['seci'] = mro_partition['seci']
mro_partition3['label'] = mro_partition['label']
mro_partition3['label_encoded'] = mro_partition['label_encoded']
mro_partition3['srsrp'] = df['srsrp']
mro_partition3['lteScPHR'] = df['lteScPHR']
mro_partition3['lteScSinrUL'] = df['lteScSinrUL']
mro_partition3['max_nrsrp'] = df['max_nrsrp']
mro_partition3['min_nrsrp'] = df['min_nrsrp']
mro_partition3['diff_rsrp'] = df['diff_rsrp']
mro_partition3['same_mod3_cnt'] = df['same_mod3_cnt']
mro_partition3['same_band_cnt'] = df['same_band_cnt']
#数据标准化后保存数据数据备份2
mro_partition3.to_csv("mro_readyprocess.csv",index=False,sep=',')
if __name__ == '__main__':
print('正在读取文件请稍后:...')
print('--------------------------------------------------------------------------------')
#处理数据
mro = handleData()
#数据图形化
drawMRO(mro)
#seaborn库 数据相关性
seabornMRO(mro)
#特征构造
featuresMRO(mro)
#数据标准化
dataStandard(mro)
Figure_1.png
Figure_2.png
Figure_3.png
4.png

Comment list( 0 )

You need to Sign in for post a comment

Help Search