LFM python 实现

最近参加一个推荐算法比赛,想试一下LFM 来提高预测精度。自己尝试写了一个LFM 实现。

数据规模比较大时,性能较差。浮点运算会有超精度的情况。建议还是应该使用libfm.


参考公式 :

损式函数

梯度公式:

采用随机梯度下降

 

 

# coding:utf-8
import pandas as pd
import numpy as np
import math
import operator
import time
import random


def initpara(users, items, F):
    p = dict()
    q = dict()

    for userid in users:
        p[userid] = [(-1 + 2 * random.random()) for f in range(0, F)]  # / math.sqrt(F)

    for itemid in items:
        q[itemid] = [(-1 + 2 * random.random()) for f in range(0, F)]  # / math.sqrt(F)

    return p, q


def initsamples(user_items):
    user_samples = []
    items_pool = []
    for userid, items in user_items.items():
        for item in items:
            items_pool.append(item)

    for userid, items in user_items.items():
        samples = dict()
        for itemid, score in items.items():
            if score != 0:
                samples[itemid] = score
        user_samples.append((userid, samples))

    return user_samples


def initmodel(user_items, users, items, F):
    p, q = initpara(users, items, F)
    user_samples = initsamples(user_items)

    return p, q, user_samples


def predict(userid, itemid, p, q):
    a = sum(p[userid][f] * q[itemid][f] for f in range(0, len(p[userid])))
    return a


def lfm(user_items, users, items, F, N, alpha, lamda):
    '''
    LFM计算参数 p,q
    :param user_items: user_items
    :param users: users
    :param items: items
    :param F: 隐类因子个数
    :param N: 迭代次数
    :param alpha: 步长
    :param lamda: 正则化参数
    :return: p,q
    '''
    p, q, user_samples = initmodel(user_items, users, items, F)

    debugid1 = 0
    debugid2 = 0
    for step in range(0, N):
        random.shuffle(user_samples)  # 乱序

        error = 0
        count = 0
        for userid, samples in user_samples:
            for itemid, rui in samples.items():
                pui = predict(userid, itemid, p, q)
                eui = rui - pui
                count += 1
                error += math.pow(eui, 2)
                '''debug'''
                if userid == 1:
                    if debugid1 == 0 and rui == 1:
                        debugid1 = itemid
                    if debugid2 == 0 and rui == -1:
                        debugid2 = itemid

                if userid == 1 and itemid == debugid1:
                    print debugid1, rui, pui, eui, alpha
                if userid == 1 and itemid == debugid2:
                    print debugid2, rui, pui, eui, alpha

                '''debug end'''

                for f in range(0, F):
                    p_u = p[userid][f]
                    q_i = q[itemid][f]

                    p[userid][f] += alpha * (eui * q_i - lamda * p_u)
                    q[itemid][f] += alpha * (eui * p_u - lamda * q_i)

        rmse = math.sqrt(error / count)
        print  "rmse:", rmse
        alpha *= 0.9
    return p, q


def predictlist(userid, items, p, q):
    predict_score = dict()
    for itemid in items:
        p_score = predict(userid, itemid, p, q)
        predict_score[itemid] = p_score

    return predict_score


def recommend():
    print 'start'
    user_items = {1: {'a': 1, 'b': -1, 'c': -1, 'd': -1, 'e': 1, 'f': 1, 'g': -1},
                  2: {'a': -1, 'b': 1, 'c': -1, 'd': 1, 'e': 1, 'f': 1, 'g': 1},
                  3: {'a': 1, 'b': -1, 'c': 0, 'd': -1, 'e': -1, 'f': -1, 'g': 1},
                  4: {'a': 1, 'b': -1, 'c': -1, 'd': 0, 'e': 1, 'f': 1, 'g': 1},
                  5: {'a': -1, 'b': 1, 'c': 1, 'd': 1, 'e': -1, 'f': -1, 'g': 0},
                  6: {'a': 1, 'b': 0, 'c': -1, 'd': -1, 'e': 1, 'f': -1, 'g': -1}}
    users = {1, 2, 3, 4, 5, 6}
    items = {'a', 'b', 'c', 'd', 'e', 'f', 'g'}
    F = 5
    N = 30
    alpha = 0.3
    lamda = 0.03
    p, q = lfm(user_items, users, items, F, N, alpha, lamda)

    for userid, itemdic in user_items.items():
        print userid
        print "target", itemdic
        predict_score = predictlist(userid, itemdic, p, q)
        print  "predicted", predict_score

    print 'end'


if __name__ == "__main__":
    recommend()

运行结果

 

posted on 2017-06-02 17:32  张日海  阅读(3598)  评论(0编辑  收藏  举报

导航