代码拉取完成,页面将自动刷新
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
def missing_rate(data):
total = data.isnull().sum().sort_values(ascending=False)
percent = (data.isnull().sum() / data.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
return missing_data
def impute_missing_values(data):
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
data_reshape = data.values.reshape(-1, 1)
return imp.fit_transform(data_reshape)
def conv_dict(train):
dic = {}
for x in train:
if train[x].dtype != 'int64':
tmp = train[[x, "Is_Lead"]].groupby(x, as_index=False).mean().sort_values(by='Is_Lead')
dic[x] = {x: i for i, x in enumerate(list(np.array(tmp)[:, 0]))}
return dic
def conv_data(train, dic):
for x in train:
if train[x].dtype != 'int64':
train[x] = train[x].replace(dic[x])
return train
if __name__ == '__main__':
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train = train.drop(['ID'], axis=1)
train = train.drop('Product')
y = np.array(train['Is_Lead'])
x = np.array(train.drop('Is_Lead', 1))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.8, random_state=18)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。