From 30e96225cc2f6ef4dbb5a186ee21bd3afc31fc34 Mon Sep 17 00:00:00 2001 From: JoyceYuen Date: Sat, 3 Jan 2015 21:07:17 +0800 Subject: [PATCH] Update classes.py in knn Hi, I like your code. It's concise and efficient. But when i read the recommenders part, that's the "class UserBasedRecommender(UserRecommender)", i found the code in the method named estimated_preference can not guarantee that one neighbor's preference will multiple the his similarity rather than others. It is the previous code: prefs = prefs[~np.isnan(prefs)] similarities = similarities[~np.isnan(prefs)] prefs_sim = np.sum(prefs[~np.isnan(similarities)] * similarities[~np.isnan(similarities)]) total_similarity = np.sum(similarities) I take a simple example: >>> import numpy as np >>> p = np.array([np.nan, 3,4,5,np.nan,5,6,np.nan,9,10]) >>> p array([ nan, 3., 4., 5., nan, 5., 6., nan, 9., 10.]) >>> s = np.array([1,np.nan,4,6,np.nan,6,7,8,9,10]) >>> s array([ 1., nan, 4., 6., nan, 6., 7., 8., 9., 10.]) >>> p = p[~np.isnan(p)] >>> p array([ 3., 4., 5., 5., 6., 9., 10.]) >>> s = s[~np.isnan(p)] >>> s array([ 1., nan, 4., 6., nan, 6., 7.]) >>> p[~np.isnan(s)] array([ 3., 5., 5., 9., 10.]) >>> s[~np.isnan(s)] array([ 1., 4., 6., 6., 7.]) >>> p[~np.isnan(s)]*s[~np.isnan(s)] array([ 3., 20., 30., 54., 70.]) it follows the steps as the code. as you can see, it gets a wrong result. my code is like this: temp_prefs = [~np.isnan(prefs)] temp_similarities = [~np.isnan(similarities)] noNaN_indices = np.logical_and(temp_prefs, temp_similarities) prefs_sim = np.sum(prefs[noNaN_indices[0] == True] * similarities[noNaN_indices[0] == True]) similarities = similarities[~np.isnan(similarities)] total_similarity = np.sum(similarities) with the same example: >>> pp = np.array([np.nan,3,4,5,np.nan,5,6,np.nan,9,10]) >>> pp array([ nan, 3., 4., 5., nan, 5., 6., nan, 9., 10.]) >>> ss = np.array([1,np.nan,4,6,np.nan,6,7,8,9,10]) >>> ss array([ 1., nan, 4., 6., nan, 6., 7., 8., 9., 10.]) >>> tss = [~np.isnan(ss)] >>> tss [array([ True, False, True, True, False, True, True, True, True, True], dtype=bool)] >>> tpp = [~np.isnan(pp)] >>> tpp [array([False, True, True, True, False, True, True, False, True, True], dtype=bool)] >>> nonNaN = np.logical_and(tss,tpp) >>> nonNaN array([[False, False, True, True, False, True, True, False, True, True]], dtype=bool) >>> ss[nonNaN[0] == True] * pp[nonNaN[0] == True] array([ 16., 30., 30., 42., 81., 100.]) as you can see, it gets the right answer. if i misunderstood, please let me know. Thank you in advance. Best Wishes --- scikits/crab/recommenders/knn/classes.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scikits/crab/recommenders/knn/classes.py b/scikits/crab/recommenders/knn/classes.py index bf96af3..7e4e86d 100644 --- a/scikits/crab/recommenders/knn/classes.py +++ b/scikits/crab/recommenders/knn/classes.py @@ -532,12 +532,15 @@ def estimate_preference(self, user_id, item_id, **params): prefs = np.array([self.model.preference_value(to_user_id, item_id) for to_user_id in nearest_neighbors]) - - prefs = prefs[~np.isnan(prefs)] - similarities = similarities[~np.isnan(prefs)] - - prefs_sim = np.sum(prefs[~np.isnan(similarities)] * - similarities[~np.isnan(similarities)]) + + temp_prefs = [~np.isnan(prefs)] + temp_similarities = [~np.isnan(similarities)] + noNaN_indices = np.logical_and(temp_prefs, temp_similarities) + + prefs_sim = np.sum(prefs[noNaN_indices[0] == True] * + similarities[noNaN_indices[0] == True]) + + similarities = similarities[~np.isnan(similarities)] total_similarity = np.sum(similarities) #Throw out the estimate if it was based on no data points,