1 Star 8 Fork 6

机智的叉烧/noteLibrary

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
user_CF.py 3.04 KB
一键复制 编辑 原始数据 按行查看 历史
机智的叉烧 提交于 6年前 . CF
MOVIE_RATING_PATH = "../../data/movie_rating_20190219_1.csv"
# -----------------------------基于用户的协同过滤-------------------------------
def eu_distance(user1, user2):
# 用户间距离:欧氏距离
distance = 0
cal = 0
for user1_key in user1.keys():
if user1_key in user2.keys():
distance = distance + pow(user1[user1_key] - user2[user1_key], 2)
cal = cal + 1
return (distance ** 0.5) / (cal + 0.001)
def cos_distance(user1, user2):
# 用户间距离:余弦距离
distance = 0
user1_norm = 0
user2_norm = 0
cal = 0
for user1_key in user1.keys():
if user1_key in user2.keys():
distance = distance + user1[user1_key] * user2[user1_key]
user1_norm = user1_norm + user1[user1_key] * user1[user1_key]
user2_norm = user2_norm + user2[user1_key] * user2[user1_key]
cal = cal + 1
res = distance / ((user1_norm ** 0.5) * (user2_norm ** 0.5) + 0.001)
return res
def top_similar(data, user, num=10):
# 最相似的N个用户
res = []
for userid in data.keys():
if userid == user:
continue
# sim = eu_distance(data[user], data[userid])
sim = cos_distance(data[user], data[userid])
res.append((userid, sim))
res.sort(key=lambda val: val[1])
print(res[:num])
return res[:num]
def recommend(data, user, user_num=10, rec_num=10):
# 进行推荐的主函数
# 获得最接近的几个用户
user_close = top_similar(data, user, num=user_num)
# 获取这些用户看过的所有item
movie = {}
for item in user_close:
for user_close_movie in data[item[0]]:
if user_close_movie not in movie:
movie[user_close_movie] = []
movie[user_close_movie].append(
(item[1], data[item[0]][user_close_movie]))
# 给所有相关item进行打分
movie_cal = []
for item in movie:
score = 0
weight = 0
for cal in movie[item]:
score = score + cal[1]
weight = weight + 1
movie_cal.append((item, score / weight))
movie_cal.sort(key=lambda val: val[1], reverse=True)
# 避免推荐用户已经看过的item
res = []
USER = data[user]
for item in movie_cal:
if item[0] in USER:
continue
res.append(item)
if len(res) >= rec_num:
break
if len(res) < rec_num:
print("合适item不足目标数量")
return res
return res[:rec_num]
# -----------------------------基于用户的协同过滤-------------------------------
# json格式化-user-movie-rating-加载版本
data = {} # DIC用用户-item-打分的形式
with open(MOVIE_RATING_PATH, 'r', encoding='UTF-8') as f:
idx = 0
for line in f:
if idx == 0:
idx = 1
continue
ll = line.strip().split(",")
if ll[0] not in data:
data[ll[0]] = {}
data[ll[0]][ll[3]] = float(ll[1])
print("data imported: %s" % len(data))
print(recommend(data, "1"))
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/chashaozgr/noteLibrary.git
git@gitee.com:chashaozgr/noteLibrary.git
chashaozgr
noteLibrary
noteLibrary
master

搜索帮助