代码拉取完成,页面将自动刷新
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn import metrics
from imblearn.over_sampling import SMOTE
import numpy as np
import pandas as pd
from preprocess import conv_dict, conv_data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train = train.drop(['ID'], axis=1)
test = test.drop(['ID'], axis=1)
train_dict = conv_dict(train)
train = conv_data(train, train_dict)
test = conv_data(test, train_dict)
null_index = train['Credit_Product'].isnull()
test_null = train.loc[null_index.index[null_index]]
print(test_null.info())
y_null = np.array(test_null['Is_Lead'])
x_null = test_null.drop('Is_Lead', 1)
y = np.array(train['Is_Lead'])
x = np.array(train.drop('Is_Lead', 1))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=18)
train_os = train.dropna()
y_os = np.array(train_os['Is_Lead'])
x_os = np.array(train_os.drop('Is_Lead', 1))
over_sampler = SMOTE(random_state=0)
x_os, y_os = over_sampler.fit_resample(x_os, y_os)
x_train = np.concatenate((x_train, x_os))
y_train = np.concatenate((y_train, y_os))
print(x_train.shape, y_train.shape)
params = {
'booster': 'gbtree',
'eval_metric': 'auc',
'objective': 'binary:logistic',
'n_estimators': 500,
'eta': 0.01,
'learning_rate': 0.1,
'max_depth': 5,
'min_child_weight': 6,
'seed': 0,
'subsample': 0.85,
'colsample_bytree': 0.8,
'gamma': 0.8,
'reg_alpha': 0.4,
'reg_lambda': 0.7
}
xgb_train = xgb.DMatrix(x_train, label=y_train)
xgb_test = xgb.DMatrix(x_test, label=y_test)
watchlist = [(xgb_train, 'train'), (xgb_test, 'test')]
num_round = 5000
bst = xgb.train(params, xgb_train, num_round, watchlist, early_stopping_rounds=200, verbose_eval=50)
bst.save_model('test.model')
pred = bst.predict(xgb_test)
y_pred = (pred >= 0.5) * 1
print('AUC: %.4f' % metrics.roc_auc_score(y_test, pred))
print('ACC: %.4f' % metrics.accuracy_score(y_test, y_pred))
print('Recall: %.4f' % metrics.recall_score(y_test, y_pred))
print('F1-score: %.4f' % metrics.f1_score(y_test, y_pred))
print('Precesion: %.4f' % metrics.precision_score(y_test, y_pred))
test_arr = xgb.DMatrix(test)
preds_test = bst.predict(test_arr)
pred_null = bst.predict(xgb.DMatrix(x_null))
print('NULL-AUC: %.4f' % metrics.roc_auc_score(y_null, pred_null))
np.savetxt('./xgboost_ans.csv', preds_test, fmt='%.8f', delimiter=',')
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。