1 Star 1 Fork 1

玛卡巴卡/ijcai-18-top2-single-mole-solution

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
full_feature.py 2.02 KB
一键复制 编辑 原始数据 按行查看 历史
yaozzhou(周耀) 提交于 2018-06-06 18:05 . add code
# coding=utf-8
# @author:bryan
"""
使用全量数据提取特征,点击数,交叉点击数,占比
"""
import pandas as pd
def full_count_feature(org,name):
col=['user_id', 'item_id', 'item_brand_id', 'shop_id', 'item_category_list', 'item_city_id','cate','top10',
'predict_category_property', 'context_page_id', 'query1', 'query']
train=org[org.day==7][['instance_id']+col]
if name=='day6':
data = org[org.day==6][col]
elif name=='days7':
data=org[org.day<7][col]
elif name == 'day7':
data = org[org.day == 7][col]
elif name=='full':
data=org[col]
for item in col:
train=pd.merge(train,data.groupby(item,as_index=False)['user_id'].agg({'_'.join([name,item,'cnt']):'count'}),on=item,how='left')
print(item)
items=col
for i in range(len(items)):
for j in range(i+1,len(items)):
egg=[items[i],items[j]]
tmp = data.groupby(egg, as_index=False)['user_id'].agg({'_'.join([name,items[i],items[j],'cnt']): 'count'})
train = pd.merge(train, tmp, on=egg, how='left')
print(egg)
cross=[['user_id','query'],['user_id','query1'],['user_id','shop_id'],['user_id','item_id'],['item_id','shop_id'],['item_id', 'item_brand_id'],
['item_brand_id', 'shop_id'],['item_id','item_category_list'],['item_id','query'],
[ 'item_id','item_city_id'],['item_id','cate'],['item_id','top10'],['item_id','context_page_id'],['item_id','query1'],
['item_brand_id', 'shop_id'],['shop_id','item_city_id'],[ 'shop_id','context_page_id']
]
for i in cross:
train['_'.join(i+['cross'])]=train['_'.join([name,i[0],i[1],'cnt'])]/train['_'.join([name,i[1],'cnt'])]
print(i)
train=train.drop(col, axis=1)
train.to_csv('../data/'+name+'_count_feature.csv',index=False)
# return train
if __name__ == '__main__':
org=pd.read_csv('../data/origion_concat.csv')
full_count_feature(org, 'day6')
full_count_feature(org, 'days7')
full_count_feature(org, 'full')
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/shellyan/ijcai-18-top2-single-mole-solution.git
git@gitee.com:shellyan/ijcai-18-top2-single-mole-solution.git
shellyan
ijcai-18-top2-single-mole-solution
ijcai-18-top2-single-mole-solution
master

搜索帮助