1 Star 1 Fork 0

姜山 / credit-card-intention-prediction

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
xgboost_model.py 2.34 KB
一键复制 编辑 原始数据 按行查看 历史
姜山 提交于 2021-11-15 00:23 . final push
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn import metrics
from imblearn.over_sampling import SMOTE
import numpy as np
import pandas as pd
from preprocess import conv_dict, conv_data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train = train.drop(['ID'], axis=1)
test = test.drop(['ID'], axis=1)
train_dict = conv_dict(train)
train = conv_data(train, train_dict)
test = conv_data(test, train_dict)
null_index = train['Credit_Product'].isnull()
test_null = train.loc[null_index.index[null_index]]
print(test_null.info())
y_null = np.array(test_null['Is_Lead'])
x_null = test_null.drop('Is_Lead', 1)
y = np.array(train['Is_Lead'])
x = np.array(train.drop('Is_Lead', 1))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=18)
train_os = train.dropna()
y_os = np.array(train_os['Is_Lead'])
x_os = np.array(train_os.drop('Is_Lead', 1))
over_sampler = SMOTE(random_state=0)
x_os, y_os = over_sampler.fit_resample(x_os, y_os)
x_train = np.concatenate((x_train, x_os))
y_train = np.concatenate((y_train, y_os))
print(x_train.shape, y_train.shape)
params = {
'booster': 'gbtree',
'eval_metric': 'auc',
'objective': 'binary:logistic',
'n_estimators': 500,
'eta': 0.01,
'learning_rate': 0.1,
'max_depth': 5,
'min_child_weight': 6,
'seed': 0,
'subsample': 0.85,
'colsample_bytree': 0.8,
'gamma': 0.8,
'reg_alpha': 0.4,
'reg_lambda': 0.7
}
xgb_train = xgb.DMatrix(x_train, label=y_train)
xgb_test = xgb.DMatrix(x_test, label=y_test)
watchlist = [(xgb_train, 'train'), (xgb_test, 'test')]
num_round = 5000
bst = xgb.train(params, xgb_train, num_round, watchlist, early_stopping_rounds=200, verbose_eval=50)
bst.save_model('test.model')
pred = bst.predict(xgb_test)
y_pred = (pred >= 0.5) * 1
print('AUC: %.4f' % metrics.roc_auc_score(y_test, pred))
print('ACC: %.4f' % metrics.accuracy_score(y_test, y_pred))
print('Recall: %.4f' % metrics.recall_score(y_test, y_pred))
print('F1-score: %.4f' % metrics.f1_score(y_test, y_pred))
print('Precesion: %.4f' % metrics.precision_score(y_test, y_pred))
test_arr = xgb.DMatrix(test)
preds_test = bst.predict(test_arr)
pred_null = bst.predict(xgb.DMatrix(x_null))
print('NULL-AUC: %.4f' % metrics.roc_auc_score(y_null, pred_null))
np.savetxt('./xgboost_ans.csv', preds_test, fmt='%.8f', delimiter=',')
Python
1
https://gitee.com/buptsg2019/credit-card-intention-prediction.git
git@gitee.com:buptsg2019/credit-card-intention-prediction.git
buptsg2019
credit-card-intention-prediction
credit-card-intention-prediction
master

搜索帮助

53164aa7 5694891 3bd8fe86 5694891