1 Star 1 Fork 0

姜山 / credit-card-intention-prediction

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
preprocess.py 1.36 KB
一键复制 编辑 原始数据 按行查看 历史
姜山 提交于 2021-11-15 00:23 . final push
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
def missing_rate(data):
total = data.isnull().sum().sort_values(ascending=False)
percent = (data.isnull().sum() / data.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
return missing_data
def impute_missing_values(data):
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
data_reshape = data.values.reshape(-1, 1)
return imp.fit_transform(data_reshape)
def conv_dict(train):
dic = {}
for x in train:
if train[x].dtype != 'int64':
tmp = train[[x, "Is_Lead"]].groupby(x, as_index=False).mean().sort_values(by='Is_Lead')
dic[x] = {x: i for i, x in enumerate(list(np.array(tmp)[:, 0]))}
return dic
def conv_data(train, dic):
for x in train:
if train[x].dtype != 'int64':
train[x] = train[x].replace(dic[x])
return train
if __name__ == '__main__':
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train = train.drop(['ID'], axis=1)
train = train.drop('Product')
y = np.array(train['Is_Lead'])
x = np.array(train.drop('Is_Lead', 1))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.8, random_state=18)
Python
1
https://gitee.com/buptsg2019/credit-card-intention-prediction.git
git@gitee.com:buptsg2019/credit-card-intention-prediction.git
buptsg2019
credit-card-intention-prediction
credit-card-intention-prediction
master

搜索帮助