代码拉取完成,页面将自动刷新
# coding: utf-8
import numpy as np
import pandas as pd
from ftrlp import *
from sklearn.metrics import log_loss as log_loss_sk
from sklearn.model_selection import train_test_split
fe_gbdt_path = "data/FE_gbdt_data.csv"
fe_train_path = "data/FE_train_data.csv"
fe_test_path = "data/FE_test_data.csv"
#将处理后的数据切分为训练集和测试集
all_data = pd.read_csv(fe_gbdt_path)
X_col = [i for i in all_data.columns if i not in ["click"]]
X = all_data[X_col]
y = all_data["click"]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 10)
fe_train_data = pd.concat([X_train, y_train], axis=1)
fe_test_data = pd.concat([X_val, y_val], axis=1)
fe_train_data.to_csv(fe_train_path, index=False)
fe_test_data.to_csv(fe_test_path, index=False)
def get_features_list():
data = pd.read_csv(fe_test_path)
col = [x for x in data.columns if x not in ['id', 'click']]
return col
features = get_features_list()
max_features = len(features)
print "features num = %s" % max_features
target = "click"
descriptive = ["id"]
categorical = []
numerical = features
data_gen = DataGen(max_features = max_features*2, target = target, descriptive = descriptive,categorical = categorical, numerical = numerical)
data_path = fe_train_path
test_path = fe_test_path
test_data = fe_test_data
y = test_data[target]
alpha = 1
beta = 1
l1 = 1
l2 = 0.5
subsample = 0.8
rate = 30000000
epoch = 50
print("alpha=%s, beta=%s, l1=%s, l2=%s, subsample=%s, rate=%s, epoch=%s" % (alpha, beta, l1, l2, subsample, rate, epoch))
ftrlp = FTRLP(alpha=alpha, beta=beta, l1=l1, l2=l2, subsample=subsample, epochs=epoch, rate=rate)
ftrlp.partial_fit(data_gen, data_path)
y_pred = ftrlp.predict(data_gen, test_path)
logloss = log_loss_sk(y, y_pred)
print("test set log loss = %s" % (logloss))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。