代码拉取完成,页面将自动刷新
MOVIE_RATING_PATH = "../../data/movie_rating_20190219_1.csv"
# -----------------------------基于用户的协同过滤-------------------------------
def eu_distance(user1, user2):
# 用户间距离:欧氏距离
distance = 0
cal = 0
for user1_key in user1.keys():
if user1_key in user2.keys():
distance = distance + pow(user1[user1_key] - user2[user1_key], 2)
cal = cal + 1
return (distance ** 0.5) / (cal + 0.001)
def cos_distance(user1, user2):
# 用户间距离:余弦距离
distance = 0
user1_norm = 0
user2_norm = 0
cal = 0
for user1_key in user1.keys():
if user1_key in user2.keys():
distance = distance + user1[user1_key] * user2[user1_key]
user1_norm = user1_norm + user1[user1_key] * user1[user1_key]
user2_norm = user2_norm + user2[user1_key] * user2[user1_key]
cal = cal + 1
res = distance / ((user1_norm ** 0.5) * (user2_norm ** 0.5) + 0.001)
return res
def top_similar(data, user, num=10):
# 最相似的N个用户
res = []
for userid in data.keys():
if userid == user:
continue
# sim = eu_distance(data[user], data[userid])
sim = cos_distance(data[user], data[userid])
res.append((userid, sim))
res.sort(key=lambda val: val[1])
print(res[:num])
return res[:num]
def recommend(data, user, user_num=10, rec_num=10):
# 进行推荐的主函数
# 获得最接近的几个用户
user_close = top_similar(data, user, num=user_num)
# 获取这些用户看过的所有item
movie = {}
for item in user_close:
for user_close_movie in data[item[0]]:
if user_close_movie not in movie:
movie[user_close_movie] = []
movie[user_close_movie].append(
(item[1], data[item[0]][user_close_movie]))
# 给所有相关item进行打分
movie_cal = []
for item in movie:
score = 0
weight = 0
for cal in movie[item]:
score = score + cal[1]
weight = weight + 1
movie_cal.append((item, score / weight))
movie_cal.sort(key=lambda val: val[1], reverse=True)
# 避免推荐用户已经看过的item
res = []
USER = data[user]
for item in movie_cal:
if item[0] in USER:
continue
res.append(item)
if len(res) >= rec_num:
break
if len(res) < rec_num:
print("合适item不足目标数量")
return res
return res[:rec_num]
# -----------------------------基于用户的协同过滤-------------------------------
# json格式化-user-movie-rating-加载版本
data = {} # DIC用用户-item-打分的形式
with open(MOVIE_RATING_PATH, 'r', encoding='UTF-8') as f:
idx = 0
for line in f:
if idx == 0:
idx = 1
continue
ll = line.strip().split(",")
if ll[0] not in data:
data[ll[0]] = {}
data[ll[0]][ll[3]] = float(ll[1])
print("data imported: %s" % len(data))
print(recommend(data, "1"))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。