public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel = getDataModel();

            return(1.0 / (1.0 + Math.Abs(dataModel.GetPreferencesFromUser(userID1).Get(0).GetValue()
                                         - dataModel.GetPreferencesFromUser(userID2).Get(0).GetValue())));
        }
        public void ratingVector()
        {
            IPreferenceArray prefs = dataModel.GetPreferencesFromUser(1);

            double[] ratingVector = ALSWRFactorizer.ratingVector(prefs);

            Assert.AreEqual(prefs.Length(), ratingVector.Length);
            Assert.AreEqual(prefs.Get(0).GetValue(), ratingVector[0], EPSILON);
            Assert.AreEqual(prefs.Get(1).GetValue(), ratingVector[1], EPSILON);
            Assert.AreEqual(prefs.Get(2).GetValue(), ratingVector[2], EPSILON);
        }
        private int countPreferences()
        {
            int numPreferences = 0;
            var userIDs        = dataModel.GetUserIDs();

            while (userIDs.MoveNext())
            {
                IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userIDs.Current);
                numPreferences += preferencesFromUser.Length();
            }
            return(numPreferences);
        }
        public static void Evaluate(IRecommender recommender,
                                    IDataModel model,
                                    int samples,
                                    IRunningAverage tracker,
                                    String tag)
        {
            printHeader();
            var users = recommender.GetDataModel().GetUserIDs();

            while (users.MoveNext())
            {
                long             userID = users.Current;
                var              recs1  = recommender.Recommend(userID, model.GetNumItems());
                IPreferenceArray prefs2 = model.GetPreferencesFromUser(userID);
                prefs2.SortByValueReversed();
                FastIDSet commonSet = new FastIDSet();
                long      maxItemID = setBits(commonSet, recs1, samples);
                FastIDSet otherSet  = new FastIDSet();
                maxItemID = Math.Max(maxItemID, setBits(otherSet, prefs2, samples));
                int max = mask(commonSet, otherSet, maxItemID);
                max = Math.Min(max, samples);
                if (max < 2)
                {
                    continue;
                }
                long[] items1   = getCommonItems(commonSet, recs1, max);
                long[] items2   = getCommonItems(commonSet, prefs2, max);
                double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
                tracker.AddDatum(variance);
            }
        }
 private void buildAverageDiffs()
 {
     lock (this) {
         //buildAveragesLock.writeLock().lock();
         IDataModel dataModel = GetDataModel();
         var        it        = dataModel.GetUserIDs();
         while (it.MoveNext())
         {
             IPreferenceArray prefs = dataModel.GetPreferencesFromUser(it.Current);
             int size = prefs.Length();
             for (int i = 0; i < size; i++)
             {
                 long            itemID  = prefs.GetItemID(i);
                 IRunningAverage average = itemAverages.Get(itemID);
                 if (average == null)
                 {
                     average = new FullRunningAverage();
                     itemAverages.Put(itemID, average);
                 }
                 average.AddDatum(prefs.GetValue(i));
             }
         }
     }
     //finally {
     //buildAveragesLock.writeLock().unlock();
     //}
 }
 public static void Evaluate(IRecommender recommender,
                             IDataModel model,
                             int samples,
                             IRunningAverage tracker,
                             String tag) {
   printHeader();
   var users = recommender.GetDataModel().GetUserIDs();
   while (users.MoveNext()) {
     long userID = users.Current;
     var recs1 = recommender.Recommend(userID, model.GetNumItems());
     IPreferenceArray prefs2 = model.GetPreferencesFromUser(userID);
     prefs2.SortByValueReversed();
     FastIDSet commonSet = new FastIDSet();
     long maxItemID = setBits(commonSet, recs1, samples);
     FastIDSet otherSet = new FastIDSet();
     maxItemID = Math.Max(maxItemID, setBits(otherSet, prefs2, samples));
     int max = mask(commonSet, otherSet, maxItemID);
     max = Math.Min(max, samples);
     if (max < 2) {
       continue;
     }
     long[] items1 = getCommonItems(commonSet, recs1, max);
     long[] items2 = getCommonItems(commonSet, prefs2, max);
     double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
     tracker.AddDatum(variance);
   }
 }
Beispiel #7
0
        public void ProcessOtherUser(long userID,
                                     FastIDSet relevantItemIDs,
                                     FastByIDMap <IPreferenceArray> trainingUsers,
                                     long otherUserID,
                                     IDataModel dataModel)
        {
            IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID);

            // If we're dealing with the very user that we're evaluating for precision/recall,
            if (userID == otherUserID)
            {
                // then must remove all the test IDs, the "relevant" item IDs
                List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length());
                foreach (IPreference pref in prefs2Array)
                {
                    if (!relevantItemIDs.Contains(pref.GetItemID()))
                    {
                        prefs2.Add(pref);
                    }
                }

                if (prefs2.Count > 0)
                {
                    trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2));
                }
            }
            else
            {
                // otherwise just add all those other user's prefs
                trainingUsers.Put(otherUserID, prefs2Array);
            }
        }
Beispiel #8
0
        public RandomRecommender(IDataModel dataModel) : base(dataModel)
        {
            float maxPref      = float.NegativeInfinity;
            float minPref      = float.PositiveInfinity;
            var   userIterator = dataModel.GetUserIDs();

            while (userIterator.MoveNext())
            {
                long             userID = userIterator.Current;
                IPreferenceArray prefs  = dataModel.GetPreferencesFromUser(userID);
                for (int i = 0; i < prefs.Length(); i++)
                {
                    float prefValue = prefs.GetValue(i);
                    if (prefValue < minPref)
                    {
                        minPref = prefValue;
                    }
                    if (prefValue > maxPref)
                    {
                        maxPref = prefValue;
                    }
                }
            }
            this.minPref = minPref;
            this.maxPref = maxPref;
        }
Beispiel #9
0
        public void testPreferenceShufflerWithSyntheticData()
        {
            setUpSyntheticData();

            ParallelSGDFactorizer.PreferenceShuffler shuffler = new ParallelSGDFactorizer.PreferenceShuffler(dataModel);
            shuffler.shuffle();
            shuffler.stage();

            FastByIDMap <FastByIDMap <bool?> > checkedLst = new FastByIDMap <FastByIDMap <bool?> >();

            for (int i = 0; i < shuffler.size(); i++)
            {
                IPreference pref = shuffler.get(i);

                float?value = dataModel.GetPreferenceValue(pref.GetUserID(), pref.GetItemID());
                Assert.AreEqual(pref.GetValue(), value.Value, 0.0);
                if (!checkedLst.ContainsKey(pref.GetUserID()))
                {
                    checkedLst.Put(pref.GetUserID(), new FastByIDMap <bool?>());
                }

                Assert.IsNull(checkedLst.Get(pref.GetUserID()).Get(pref.GetItemID()));

                checkedLst.Get(pref.GetUserID()).Put(pref.GetItemID(), true);
            }

            var userIDs = dataModel.GetUserIDs();
            int index   = 0;

            while (userIDs.MoveNext())
            {
                long             userID = userIDs.Current;
                IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID);
                foreach (IPreference preference in preferencesFromUser)
                {
                    Assert.True(checkedLst.Get(preference.GetUserID()).Get(preference.GetItemID()).Value);
                    index++;
                }
            }
            Assert.AreEqual(index, shuffler.size());
        }
        /// <summary>Exports the simple user IDs and preferences in the data model.</summary>
        /// <returns>a <see cref="FastByIDMap"/> mapping user IDs to <see cref="IPreferenceArray"/>s representing that user's preferences</returns>
        public static FastByIDMap <IPreferenceArray> ToDataMap(IDataModel dataModel)
        {
            FastByIDMap <IPreferenceArray> data = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers());
            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;
                data.Put(userID, dataModel.GetPreferencesFromUser(userID));
            }
            return(data);
        }
Beispiel #11
0
 public virtual IPreferenceArray GetPreferencesFromUser(long userID)
 {
     if (userID == TEMP_USER_ID)
     {
         if (tempPrefs == null)
         {
             throw new NoSuchUserException(TEMP_USER_ID);
         }
         return(tempPrefs);
     }
     return(_delegate.GetPreferencesFromUser(userID));
 }
        double getAveragePreference()
        {
            IRunningAverage average = new FullRunningAverage();
            var             it      = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                foreach (IPreference pref in dataModel.GetPreferencesFromUser(it.Current))
                {
                    average.AddDatum(pref.GetValue());
                }
            }
            return(average.GetAverage());
        }
 public FastIDSet GetRelevantItemsIDs(long userID,
                                      int at,
                                      double relevanceThreshold,
                                      IDataModel dataModel) {
   IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);
   FastIDSet relevantItemIDs = new FastIDSet(at);
   prefs.SortByValueReversed();
   for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) {
     if (prefs.GetValue(i) >= relevanceThreshold) {
       relevantItemIDs.Add(prefs.GetItemID(i));
     }
   }
   return relevantItemIDs;
 }
Beispiel #14
0
        public FastIDSet GetRelevantItemsIDs(long userID,
                                             int at,
                                             double relevanceThreshold,
                                             IDataModel dataModel)
        {
            IPreferenceArray prefs           = dataModel.GetPreferencesFromUser(userID);
            FastIDSet        relevantItemIDs = new FastIDSet(at);

            prefs.SortByValueReversed();
            for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++)
            {
                if (prefs.GetValue(i) >= relevanceThreshold)
                {
                    relevantItemIDs.Add(prefs.GetItemID(i));
                }
            }
            return(relevantItemIDs);
        }
            private void cachePreferences(IDataModel dataModel)
            {
                int numPreferences = countPreferences(dataModel);

                preferences = new IPreference[numPreferences];

                var userIDs = dataModel.GetUserIDs();
                int index   = 0;

                while (userIDs.MoveNext())
                {
                    long             userID = userIDs.Current;
                    IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID);
                    foreach (IPreference preference in preferencesFromUser)
                    {
                        preferences[index++] = preference;
                    }
                }
            }
Beispiel #16
0
        public List <IRecommendedItem> RecommendedBecause(long userID, long itemID, int howMany)
        {
            //Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");

            IDataModel model = GetDataModel();

            TopItems.IEstimator <long> estimator = new RecommendedBecauseEstimator(this, userID, itemID);

            IPreferenceArray prefs = model.GetPreferencesFromUser(userID);
            int       size         = prefs.Length();
            FastIDSet allUserItems = new FastIDSet(size);

            for (int i = 0; i < size; i++)
            {
                allUserItems.Add(prefs.GetItemID(i));
            }
            allUserItems.Remove(itemID);

            return(TopItems.GetTopItems(howMany, allUserItems.GetEnumerator(), null, estimator));
        }
 public RandomRecommender(IDataModel dataModel) : base(dataModel) {
   float maxPref = float.NegativeInfinity;
   float minPref = float.PositiveInfinity;
   var userIterator = dataModel.GetUserIDs();
   while (userIterator.MoveNext()) {
     long userID = userIterator.Current;
     IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);
     for (int i = 0; i < prefs.Length(); i++) {
       float prefValue = prefs.GetValue(i);
       if (prefValue < minPref) {
         minPref = prefValue;
       }
       if (prefValue > maxPref) {
         maxPref = prefValue;
       }
     }
   }
   this.minPref = minPref;
   this.maxPref = maxPref;
 }
        private void splitOneUsersPrefs(double trainingPercentage,
                                        FastByIDMap <IPreferenceArray> trainingPrefs,
                                        FastByIDMap <IPreferenceArray> testPrefs,
                                        long userID,
                                        IDataModel dataModel)
        {
            List <IPreference> oneUserTrainingPrefs = null;
            List <IPreference> oneUserTestPrefs     = null;
            IPreferenceArray   prefs = dataModel.GetPreferencesFromUser(userID);
            int size = prefs.Length();

            for (int i = 0; i < size; i++)
            {
                IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i));
                if (random.nextDouble() < trainingPercentage)
                {
                    if (oneUserTrainingPrefs == null)
                    {
                        oneUserTrainingPrefs = new List <IPreference>(3);
                    }
                    oneUserTrainingPrefs.Add(newPref);
                }
                else
                {
                    if (oneUserTestPrefs == null)
                    {
                        oneUserTestPrefs = new List <IPreference>(3);
                    }
                    oneUserTestPrefs.Add(newPref);
                }
            }
            if (oneUserTrainingPrefs != null)
            {
                trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs));
                if (oneUserTestPrefs != null)
                {
                    testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs));
                }
            }
        }
Beispiel #19
0
 private void buildAverageDiffs()
 {
     lock (this) {
         //buildAveragesLock.writeLock().lock();
         IDataModel dataModel = GetDataModel();
         var        it        = dataModel.GetUserIDs();
         while (it.MoveNext())
         {
             long             userID = it.Current;
             IPreferenceArray prefs  = dataModel.GetPreferencesFromUser(userID);
             int size = prefs.Length();
             for (int i = 0; i < size; i++)
             {
                 long  itemID = prefs.GetItemID(i);
                 float value  = prefs.GetValue(i);
                 addDatumAndCreateIfNeeded(itemID, value, itemAverages);
                 addDatumAndCreateIfNeeded(userID, value, userAverages);
                 overallAveragePrefValue.AddDatum(value);
             }
         }
     } /*finally {
        * buildAveragesLock.writeLock().unlock();
        * }*/
 }
  public void ProcessOtherUser(long userID,
                               FastIDSet relevantItemIDs,
                               FastByIDMap<IPreferenceArray> trainingUsers,
                               long otherUserID,
                               IDataModel dataModel) {
    IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID);
    // If we're dealing with the very user that we're evaluating for precision/recall,
    if (userID == otherUserID) {
      // then must remove all the test IDs, the "relevant" item IDs
      List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length());
      foreach (IPreference pref in prefs2Array) {
		  if (!relevantItemIDs.Contains(pref.GetItemID())) {
			  prefs2.Add(pref);
		  }
      }

      if (prefs2.Count>0) {
        trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2));
      }
    } else {
      // otherwise just add all those other user's prefs
      trainingUsers.Put(otherUserID, prefs2Array);
    }
  }
 public override IPreferenceArray GetPreferencesFromUser(long userID)
 {
     return(_delegate.GetPreferencesFromUser(userID));
 }
  public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                               IDataModelBuilder dataModelBuilder,
                               IDataModel dataModel,
                               IDRescorer rescorer,
                               int at,
                               double relevanceThreshold,
                               double evaluationPercentage) {

    //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
    //Preconditions.checkArgument(dataModel != null, "dataModel is null");
    //Preconditions.checkArgument(at >= 1, "at must be at least 1");
    //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
    //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

    int numItems = dataModel.GetNumItems();
    IRunningAverage precision = new FullRunningAverage();
    IRunningAverage recall = new FullRunningAverage();
    IRunningAverage fallOut = new FullRunningAverage();
    IRunningAverage nDCG = new FullRunningAverage();
    int numUsersRecommendedFor = 0;
    int numUsersWithRecommendations = 0;

    var it = dataModel.GetUserIDs();
    while (it.MoveNext()) {

      long userID = it.Current;

      if (random.nextDouble() >= evaluationPercentage) {
        // Skipped
        continue;
      }

	  var stopWatch = new System.Diagnostics.Stopwatch();
	  stopWatch.Start();

      IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

      // List some most-preferred items that would count as (most) "relevant" results
      double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
      FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

      int numRelevantItems = relevantItemIDs.Count();
      if (numRelevantItems <= 0) {
        continue;
      }

      FastByIDMap<IPreferenceArray> trainingUsers = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers());
      var it2 = dataModel.GetUserIDs();
      while (it2.MoveNext()) {
        dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
      }

      IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
      try {
        trainingModel.GetPreferencesFromUser(userID);
      } catch (NoSuchUserException nsee) {
        continue; // Oops we excluded all prefs for the user -- just move on
      }

      int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
      if (size < 2 * at) {
        // Really not enough prefs to meaningfully evaluate this user
        continue;
      }

      IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

      int intersectionSize = 0;
      var recommendedItems = recommender.Recommend(userID, at, rescorer);
      foreach (IRecommendedItem recommendedItem in recommendedItems) {
        if (relevantItemIDs.Contains(recommendedItem.GetItemID())) {
          intersectionSize++;
        }
      }

      int numRecommendedItems = recommendedItems.Count;

      // Precision
      if (numRecommendedItems > 0) {
        precision.AddDatum((double) intersectionSize / (double) numRecommendedItems);
      }

      // Recall
      recall.AddDatum((double) intersectionSize / (double) numRelevantItems);

      // Fall-out
      if (numRelevantItems < size) {
        fallOut.AddDatum((double) (numRecommendedItems - intersectionSize)
                         / (double) (numItems - numRelevantItems));
      }

      // nDCG
      // In computing, assume relevant IDs have relevance 1 and others 0
      double cumulativeGain = 0.0;
      double idealizedGain = 0.0;
      for (int i = 0; i < numRecommendedItems; i++) {
        IRecommendedItem item = recommendedItems[i];
        double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
        if (relevantItemIDs.Contains(item.GetItemID())) {
          cumulativeGain += discount;
        }
        // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

        // Ideally results would be ordered with all relevant ones first, so this theoretical
        // ideal list starts with number of relevant items equal to the total number of relevant items
        if (i < numRelevantItems) {
          idealizedGain += discount;
        }
      }
      if (idealizedGain > 0.0) {
        nDCG.AddDatum(cumulativeGain / idealizedGain);
      }

      // Reach
      numUsersRecommendedFor++;
      if (numRecommendedItems > 0) {
        numUsersWithRecommendations++;
      }

	  stopWatch.Stop();

      log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
      log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
               precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
               (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
    }

    return new IRStatisticsImpl(
        precision.GetAverage(),
        recall.GetAverage(),
        fallOut.GetAverage(),
        nDCG.GetAverage(),
        (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
  }
 private void splitOneUsersPrefs(double trainingPercentage,
                                 FastByIDMap<IPreferenceArray> trainingPrefs,
                                 FastByIDMap<IPreferenceArray> testPrefs,
                                 long userID,
                                 IDataModel dataModel) {
   List<IPreference> oneUserTrainingPrefs = null;
   List<IPreference> oneUserTestPrefs = null;
   IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);
   int size = prefs.Length();
   for (int i = 0; i < size; i++) {
     IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i));
     if (random.nextDouble() < trainingPercentage) {
       if (oneUserTrainingPrefs == null) {
         oneUserTrainingPrefs = new List<IPreference>(3);
       }
       oneUserTrainingPrefs.Add(newPref);
     } else {
       if (oneUserTestPrefs == null) {
         oneUserTestPrefs = new List<IPreference>(3);
       }
       oneUserTestPrefs.Add(newPref);
     }
   }
   if (oneUserTrainingPrefs != null) {
     trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs));
     if (oneUserTestPrefs != null) {
       testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs));
     }
   }
 }
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel       dataModel = getDataModel();
            IPreferenceArray xPrefs    = dataModel.GetPreferencesFromUser(userID1);
            IPreferenceArray yPrefs    = dataModel.GetPreferencesFromUser(userID2);
            int xLength = xPrefs.Length();
            int yLength = yPrefs.Length();

            if (xLength == 0 || yLength == 0)
            {
                return(Double.NaN);
            }

            long xIndex     = xPrefs.GetItemID(0);
            long yIndex     = yPrefs.GetItemID(0);
            int  xPrefIndex = 0;
            int  yPrefIndex = 0;

            double sumX       = 0.0;
            double sumX2      = 0.0;
            double sumY       = 0.0;
            double sumY2      = 0.0;
            double sumXY      = 0.0;
            double sumXYdiff2 = 0.0;
            int    count      = 0;

            bool hasInferrer = inferrer != null;

            while (true)
            {
                int compare = xIndex <yIndex ? -1 : xIndex> yIndex ? 1 : 0;
                if (hasInferrer || compare == 0)
                {
                    double x;
                    double y;
                    if (xIndex == yIndex)
                    {
                        // Both users expressed a preference for the item
                        x = xPrefs.GetValue(xPrefIndex);
                        y = yPrefs.GetValue(yPrefIndex);
                    }
                    else
                    {
                        // Only one user expressed a preference, but infer the other one's preference and tally
                        // as if the other user expressed that preference
                        if (compare < 0)
                        {
                            // X has a value; infer Y's
                            x = xPrefs.GetValue(xPrefIndex);
                            y = inferrer.InferPreference(userID2, xIndex);
                        }
                        else
                        {
                            // compare > 0
                            // Y has a value; infer X's
                            x = inferrer.InferPreference(userID1, yIndex);
                            y = yPrefs.GetValue(yPrefIndex);
                        }
                    }
                    sumXY += x * y;
                    sumX  += x;
                    sumX2 += x * x;
                    sumY  += y;
                    sumY2 += y * y;
                    double diff = x - y;
                    sumXYdiff2 += diff * diff;
                    count++;
                }
                if (compare <= 0)
                {
                    if (++xPrefIndex >= xLength)
                    {
                        if (hasInferrer)
                        {
                            // Must count other Ys; pretend next X is far away
                            if (yIndex == long.MaxValue)
                            {
                                // ... but stop if both are done!
                                break;
                            }
                            xIndex = long.MaxValue;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else
                    {
                        xIndex = xPrefs.GetItemID(xPrefIndex);
                    }
                }
                if (compare >= 0)
                {
                    if (++yPrefIndex >= yLength)
                    {
                        if (hasInferrer)
                        {
                            // Must count other Xs; pretend next Y is far away
                            if (xIndex == long.MaxValue)
                            {
                                // ... but stop if both are done!
                                break;
                            }
                            yIndex = long.MaxValue;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else
                    {
                        yIndex = yPrefs.GetItemID(yPrefIndex);
                    }
                }
            }

            // "Center" the data. If my math is correct, this'll do it.
            double result;

            if (centerData)
            {
                double meanX = sumX / count;
                double meanY = sumY / count;
                // double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
                double centeredSumXY = sumXY - meanY * sumX;
                // double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
                double centeredSumX2 = sumX2 - meanX * sumX;
                // double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
                double centeredSumY2 = sumY2 - meanY * sumY;
                result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
            }
            else
            {
                result = computeResult(count, sumXY, sumX2, sumY2, sumXYdiff2);
            }

            if (!Double.IsNaN(result))
            {
                result = normalizeWeightResult(result, count, cachedNumItems);
            }
            return(result);
        }
        public double UserSimilarity(long userID1, long userID2)
        {
            IPreferenceArray xPrefs = dataModel.GetPreferencesFromUser(userID1);
            IPreferenceArray yPrefs = dataModel.GetPreferencesFromUser(userID2);
            int xLength             = xPrefs.Length();
            int yLength             = yPrefs.Length();

            if (xLength <= 1 || yLength <= 1)
            {
                return(Double.NaN);
            }

            // Copy prefs since we need to modify pref values to ranks
            xPrefs = xPrefs.Clone();
            yPrefs = yPrefs.Clone();

            // First sort by values from low to high
            xPrefs.SortByValue();
            yPrefs.SortByValue();

            // Assign ranks from low to high
            float nextRank = 1.0f;

            for (int i = 0; i < xLength; i++)
            {
                // ... but only for items that are common to both pref arrays
                if (yPrefs.HasPrefWithItemID(xPrefs.GetItemID(i)))
                {
                    xPrefs.SetValue(i, nextRank);
                    nextRank += 1.0f;
                }
                // Other values are bogus but don't matter
            }
            nextRank = 1.0f;
            for (int i = 0; i < yLength; i++)
            {
                if (xPrefs.HasPrefWithItemID(yPrefs.GetItemID(i)))
                {
                    yPrefs.SetValue(i, nextRank);
                    nextRank += 1.0f;
                }
            }

            xPrefs.SortByItem();
            yPrefs.SortByItem();

            long xIndex     = xPrefs.GetItemID(0);
            long yIndex     = yPrefs.GetItemID(0);
            int  xPrefIndex = 0;
            int  yPrefIndex = 0;

            double sumXYRankDiff2 = 0.0;
            int    count          = 0;

            while (true)
            {
                int compare = xIndex <yIndex ? -1 : xIndex> yIndex ? 1 : 0;
                if (compare == 0)
                {
                    double diff = xPrefs.GetValue(xPrefIndex) - yPrefs.GetValue(yPrefIndex);
                    sumXYRankDiff2 += diff * diff;
                    count++;
                }
                if (compare <= 0)
                {
                    if (++xPrefIndex >= xLength)
                    {
                        break;
                    }
                    xIndex = xPrefs.GetItemID(xPrefIndex);
                }
                if (compare >= 0)
                {
                    if (++yPrefIndex >= yLength)
                    {
                        break;
                    }
                    yIndex = yPrefs.GetItemID(yPrefIndex);
                }
            }

            if (count <= 1)
            {
                return(Double.NaN);
            }

            // When ranks are unique, this formula actually gives the Pearson correlation
            return(1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1)));
        }
Beispiel #26
0
        public override Factorization Factorize()
        {
            log.Info("starting to compute the factorization...");
            Features features = new Features(this);

            /// feature maps necessary for solving for implicit feedback
            IDictionary <int, double[]> userY = null;
            IDictionary <int, double[]> itemY = null;

            if (usesImplicitFeedback)
            {
                userY = userFeaturesMapping(dataModel.GetUserIDs(), dataModel.GetNumUsers(), features.getU());
                itemY = itemFeaturesMapping(dataModel.GetItemIDs(), dataModel.GetNumItems(), features.getM());
            }

            IList <Task> tasks;

            for (int iteration = 0; iteration < numIterations; iteration++)
            {
                log.Info("iteration {0}", iteration);

                /// fix M - compute U
                tasks = new List <Task>();
                var userIDsIterator = dataModel.GetUserIDs();
                try {
                    ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback
            ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, itemY) : null;

                    while (userIDsIterator.MoveNext())
                    {
                        long             userID          = userIDsIterator.Current;
                        var              itemIDsFromUser = dataModel.GetItemIDsFromUser(userID).GetEnumerator();
                        IPreferenceArray userPrefs       = dataModel.GetPreferencesFromUser(userID);

                        tasks.Add(Task.Factory.StartNew(() => {
                            List <double[]> featureVectors = new List <double[]>();
                            while (itemIDsFromUser.MoveNext())
                            {
                                long itemID = itemIDsFromUser.Current;
                                featureVectors.Add(features.getItemFeatureColumn(itemIndex(itemID)));
                            }

                            var userFeatures = usesImplicitFeedback
                                          ? implicitFeedbackSolver.solve(sparseUserRatingVector(userPrefs))
                                          : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(userPrefs), lambda, numFeatures);

                            features.setFeatureColumnInU(userIndex(userID), userFeatures);
                        }
                                                        ));
                    }
                } finally {
                    // queue.shutdown();
                    try {
                        Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumUsers());
                    } catch (AggregateException e) {
                        log.Warn("Error when computing user features", e);
                        throw e;
                    }
                }

                /// fix U - compute M
                //queue = createQueue();
                tasks = new List <Task>();

                var itemIDsIterator = dataModel.GetItemIDs();
                try {
                    ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback
            ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, userY) : null;

                    while (itemIDsIterator.MoveNext())
                    {
                        long             itemID    = itemIDsIterator.Current;
                        IPreferenceArray itemPrefs = dataModel.GetPreferencesForItem(itemID);

                        tasks.Add(Task.Factory.StartNew(() => {
                            var featureVectors = new List <double[]>();
                            foreach (IPreference pref in itemPrefs)
                            {
                                long userID = pref.GetUserID();
                                featureVectors.Add(features.getUserFeatureColumn(userIndex(userID)));
                            }

                            var itemFeatures = usesImplicitFeedback
                  ? implicitFeedbackSolver.solve(sparseItemRatingVector(itemPrefs))
                  : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(itemPrefs), lambda, numFeatures);

                            features.setFeatureColumnInM(itemIndex(itemID), itemFeatures);
                        }));
                    }
                } finally {
                    try {
                        Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumItems());
                        //queue.awaitTermination(dataModel.getNumItems(), TimeUnit.SECONDS);
                    } catch (AggregateException e) {
                        log.Warn("Error when computing item features", e);
                        throw e;
                    }
                }
            }

            log.Info("finished computation of the factorization...");
            return(createFactorization(features.getU(), features.getM()));
        }
        /// <summary>Exports the simple user IDs and preferences in the data model.</summary>
        /// <returns>a <see cref="FastByIDMap"/> mapping user IDs to <see cref="IPreferenceArray"/>s representing that user's preferences</returns>
        public static FastByIDMap<IPreferenceArray> ToDataMap(IDataModel dataModel) {
		FastByIDMap<IPreferenceArray> data = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers());
		var it = dataModel.GetUserIDs();
		while (it.MoveNext()) {
			long userID = it.Current;
			data.Put(userID, dataModel.GetPreferencesFromUser(userID));
		}
		return data;
	}
    private void cachePreferences(IDataModel dataModel) {
      int numPreferences = countPreferences(dataModel);
      preferences = new IPreference[numPreferences];

      var userIDs = dataModel.GetUserIDs();
      int index = 0;
      while (userIDs.MoveNext()) {
        long userID = userIDs.Current;
        IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID);
        foreach (IPreference preference in preferencesFromUser) {
          preferences[index++] = preference;
        }
      }
    }
 private int countPreferences(IDataModel dataModel) {
   int numPreferences = 0;
   var userIDs = dataModel.GetUserIDs();
   while (userIDs.MoveNext()) {
     IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userIDs.Current);
     numPreferences += preferencesFromUser.Length();
   }
   return numPreferences;
 }
Beispiel #30
0
        public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                                     IDataModelBuilder dataModelBuilder,
                                     IDataModel dataModel,
                                     IDRescorer rescorer,
                                     int at,
                                     double relevanceThreshold,
                                     double evaluationPercentage)
        {
            //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
            //Preconditions.checkArgument(dataModel != null, "dataModel is null");
            //Preconditions.checkArgument(at >= 1, "at must be at least 1");
            //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
            //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

            int             numItems  = dataModel.GetNumItems();
            IRunningAverage precision = new FullRunningAverage();
            IRunningAverage recall    = new FullRunningAverage();
            IRunningAverage fallOut   = new FullRunningAverage();
            IRunningAverage nDCG      = new FullRunningAverage();
            int             numUsersRecommendedFor      = 0;
            int             numUsersWithRecommendations = 0;

            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;

                if (random.nextDouble() >= evaluationPercentage)
                {
                    // Skipped
                    continue;
                }

                var stopWatch = new System.Diagnostics.Stopwatch();
                stopWatch.Start();

                IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

                // List some most-preferred items that would count as (most) "relevant" results
                double    theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
                FastIDSet relevantItemIDs       = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

                int numRelevantItems = relevantItemIDs.Count();
                if (numRelevantItems <= 0)
                {
                    continue;
                }

                FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers());
                var it2 = dataModel.GetUserIDs();
                while (it2.MoveNext())
                {
                    dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
                }

                IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
                try {
                    trainingModel.GetPreferencesFromUser(userID);
                } catch (NoSuchUserException nsee) {
                    continue; // Oops we excluded all prefs for the user -- just move on
                }

                int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
                if (size < 2 * at)
                {
                    // Really not enough prefs to meaningfully evaluate this user
                    continue;
                }

                IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

                int intersectionSize = 0;
                var recommendedItems = recommender.Recommend(userID, at, rescorer);
                foreach (IRecommendedItem recommendedItem in recommendedItems)
                {
                    if (relevantItemIDs.Contains(recommendedItem.GetItemID()))
                    {
                        intersectionSize++;
                    }
                }

                int numRecommendedItems = recommendedItems.Count;

                // Precision
                if (numRecommendedItems > 0)
                {
                    precision.AddDatum((double)intersectionSize / (double)numRecommendedItems);
                }

                // Recall
                recall.AddDatum((double)intersectionSize / (double)numRelevantItems);

                // Fall-out
                if (numRelevantItems < size)
                {
                    fallOut.AddDatum((double)(numRecommendedItems - intersectionSize)
                                     / (double)(numItems - numRelevantItems));
                }

                // nDCG
                // In computing, assume relevant IDs have relevance 1 and others 0
                double cumulativeGain = 0.0;
                double idealizedGain  = 0.0;
                for (int i = 0; i < numRecommendedItems; i++)
                {
                    IRecommendedItem item     = recommendedItems[i];
                    double           discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
                    if (relevantItemIDs.Contains(item.GetItemID()))
                    {
                        cumulativeGain += discount;
                    }
                    // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

                    // Ideally results would be ordered with all relevant ones first, so this theoretical
                    // ideal list starts with number of relevant items equal to the total number of relevant items
                    if (i < numRelevantItems)
                    {
                        idealizedGain += discount;
                    }
                }
                if (idealizedGain > 0.0)
                {
                    nDCG.AddDatum(cumulativeGain / idealizedGain);
                }

                // Reach
                numUsersRecommendedFor++;
                if (numRecommendedItems > 0)
                {
                    numUsersWithRecommendations++;
                }

                stopWatch.Stop();

                log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
                log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
                         precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
                         (double)numUsersWithRecommendations / (double)numUsersRecommendedFor);
            }

            return(new IRStatisticsImpl(
                       precision.GetAverage(),
                       recall.GetAverage(),
                       fallOut.GetAverage(),
                       nDCG.GetAverage(),
                       (double)numUsersWithRecommendations / (double)numUsersRecommendedFor));
        }