public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel = getDataModel();
            FastIDSet  xPrefs    = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  yPrefs    = dataModel.GetItemIDsFromUser(userID2);

            int xPrefsSize = xPrefs.Count();
            int yPrefsSize = yPrefs.Count();

            if (xPrefsSize == 0 && yPrefsSize == 0)
            {
                return(Double.NaN);
            }
            if (xPrefsSize == 0 || yPrefsSize == 0)
            {
                return(0.0);
            }

            int intersectionSize =
                xPrefsSize < yPrefsSize?yPrefs.IntersectionSize(xPrefs) : xPrefs.IntersectionSize(yPrefs);

            if (intersectionSize == 0)
            {
                return(Double.NaN);
            }

            int unionSize = xPrefsSize + yPrefsSize - intersectionSize;

            return((double)intersectionSize / (double)unionSize);
        }
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel        = getDataModel();
            FastIDSet  prefs1           = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  prefs2           = dataModel.GetItemIDsFromUser(userID2);
            int        prefs1Size       = prefs1.Count();
            int        prefs2Size       = prefs2.Count();
            int        intersectionSize = prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2);

            return(doSimilarity(prefs1Size, prefs2Size, intersectionSize));
        }
Пример #3
0
        protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID)
        {
            IDataModel dataModel       = GetDataModel();
            FastIDSet  possibleItemIDs = new FastIDSet();

            foreach (long userID in theNeighborhood)
            {
                possibleItemIDs.AddAll(dataModel.GetItemIDsFromUser(userID));
            }
            possibleItemIDs.RemoveAll(dataModel.GetItemIDsFromUser(theUserID));
            return(possibleItemIDs);
        }
 protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) {
   FastIDSet possibleItemsIDs = new FastIDSet();
   foreach (long itemID in preferredItemIDs) {
     IPreferenceArray itemPreferences = dataModel.GetPreferencesForItem(itemID);
     int numUsersPreferringItem = itemPreferences.Length();
     for (int index = 0; index < numUsersPreferringItem; index++) {
       possibleItemsIDs.AddAll(dataModel.GetItemIDsFromUser(itemPreferences.GetUserID(index)));
     }
   }
   possibleItemsIDs.RemoveAll(preferredItemIDs);
   return possibleItemsIDs;
 }
        /// Exports the simple user IDs and associated item IDs in the data model.
        ///
        /// @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing
        ///  that user's associated items
        public static FastByIDMap <FastIDSet> toDataMap(IDataModel dataModel)
        {
            FastByIDMap <FastIDSet> data = new FastByIDMap <FastIDSet>(dataModel.GetNumUsers());
            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;
                data.Put(userID, dataModel.GetItemIDsFromUser(userID));
            }
            return(data);
        }
Пример #6
0
 public virtual FastIDSet GetItemIDsFromUser(long userID)
 {
     if (userID == TEMP_USER_ID)
     {
         if (tempPrefs == null)
         {
             throw new NoSuchUserException(TEMP_USER_ID);
         }
         return(prefItemIDs);
     }
     return(_delegate.GetItemIDsFromUser(userID));
 }
Пример #7
0
        protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel)
        {
            var preferredItemIDsIterator = ((IEnumerable <long>)preferredItemIDs).GetEnumerator();

            if (preferredItemIDs.Length > maxItems)
            {
                double samplingRate = (double)maxItems / preferredItemIDs.Length;
                log.Info("preferredItemIDs.Length {0}, samplingRate {1}", preferredItemIDs.Length, samplingRate);
                preferredItemIDsIterator =
                    new SamplinglongPrimitiveIterator(preferredItemIDsIterator, samplingRate);
            }
            FastIDSet possibleItemsIDs = new FastIDSet();

            while (preferredItemIDsIterator.MoveNext())
            {
                long             itemID = preferredItemIDsIterator.Current;
                IPreferenceArray prefs  = dataModel.GetPreferencesForItem(itemID);
                int prefsLength         = prefs.Length();
                if (prefsLength > maxUsersPerItem)
                {
                    var sampledPrefs =
                        new FixedSizeSamplingIterator <IPreference>(maxUsersPerItem, prefs.GetEnumerator());
                    while (sampledPrefs.MoveNext())
                    {
                        addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(sampledPrefs.Current.GetUserID()));
                    }
                }
                else
                {
                    for (int i = 0; i < prefsLength; i++)
                    {
                        addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(prefs.GetUserID(i)));
                    }
                }
            }
            possibleItemsIDs.RemoveAll(preferredItemIDs);
            return(possibleItemsIDs);
        }
Пример #8
0
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel = getDataModel();
            FastIDSet  prefs1    = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  prefs2    = dataModel.GetItemIDsFromUser(userID2);

            long prefs1Size       = prefs1.Count();
            long prefs2Size       = prefs2.Count();
            long intersectionSize =
                prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2);

            if (intersectionSize == 0)
            {
                return(Double.NaN);
            }
            long   numItems      = dataModel.GetNumItems();
            double logLikelihood =
                LogLikelihood.logLikelihoodRatio(intersectionSize,
                                                 prefs2Size - intersectionSize,
                                                 prefs1Size - intersectionSize,
                                                 numItems - prefs1Size - prefs2Size + intersectionSize);

            return(1.0 - 1.0 / (1.0 + logLikelihood));
        }
        protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel)
        {
            FastIDSet possibleItemsIDs = new FastIDSet();

            foreach (long itemID in preferredItemIDs)
            {
                IPreferenceArray itemPreferences = dataModel.GetPreferencesForItem(itemID);
                int numUsersPreferringItem       = itemPreferences.Length();
                for (int index = 0; index < numUsersPreferringItem; index++)
                {
                    possibleItemsIDs.AddAll(dataModel.GetItemIDsFromUser(itemPreferences.GetUserID(index)));
                }
            }
            possibleItemsIDs.RemoveAll(preferredItemIDs);
            return(possibleItemsIDs);
        }
  protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) {
    var preferredItemIDsIterator = ((IEnumerable<long>)preferredItemIDs).GetEnumerator();
    if (preferredItemIDs.Length > maxItems) {
      double samplingRate = (double) maxItems / preferredItemIDs.Length;
      log.Info("preferredItemIDs.Length {0}, samplingRate {1}", preferredItemIDs.Length, samplingRate);
      preferredItemIDsIterator = 
          new SamplinglongPrimitiveIterator(preferredItemIDsIterator, samplingRate);
    }
    FastIDSet possibleItemsIDs = new FastIDSet();
    while (preferredItemIDsIterator.MoveNext()) {
      long itemID = preferredItemIDsIterator.Current;
      IPreferenceArray prefs = dataModel.GetPreferencesForItem(itemID);
      int prefsLength = prefs.Length();
	  if (prefsLength > maxUsersPerItem) {
        var sampledPrefs =
			new FixedSizeSamplingIterator<IPreference>(maxUsersPerItem, prefs.GetEnumerator());
        while (sampledPrefs.MoveNext()) {
          addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(sampledPrefs.Current.GetUserID()));
        }
      } else {
        for (int i = 0; i < prefsLength; i++) {
          addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(prefs.GetUserID(i)));
        }
      }
    }
    possibleItemsIDs.RemoveAll(preferredItemIDs);
    return possibleItemsIDs;
  }
  /// Exports the simple user IDs and associated item IDs in the data model.
  ///
  /// @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing
  ///  that user's associated items
 public static FastByIDMap<FastIDSet> toDataMap(IDataModel dataModel) {
   FastByIDMap<FastIDSet> data = new FastByIDMap<FastIDSet>(dataModel.GetNumUsers());
   var it = dataModel.GetUserIDs();
   while (it.MoveNext()) {
     long userID = it.Current;
     data.Put(userID, dataModel.GetItemIDsFromUser(userID));
   }
   return data;
 }
Пример #12
0
        public override Factorization Factorize()
        {
            log.Info("starting to compute the factorization...");
            Features features = new Features(this);

            /// feature maps necessary for solving for implicit feedback
            IDictionary <int, double[]> userY = null;
            IDictionary <int, double[]> itemY = null;

            if (usesImplicitFeedback)
            {
                userY = userFeaturesMapping(dataModel.GetUserIDs(), dataModel.GetNumUsers(), features.getU());
                itemY = itemFeaturesMapping(dataModel.GetItemIDs(), dataModel.GetNumItems(), features.getM());
            }

            IList <Task> tasks;

            for (int iteration = 0; iteration < numIterations; iteration++)
            {
                log.Info("iteration {0}", iteration);

                /// fix M - compute U
                tasks = new List <Task>();
                var userIDsIterator = dataModel.GetUserIDs();
                try {
                    ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback
            ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, itemY) : null;

                    while (userIDsIterator.MoveNext())
                    {
                        long             userID          = userIDsIterator.Current;
                        var              itemIDsFromUser = dataModel.GetItemIDsFromUser(userID).GetEnumerator();
                        IPreferenceArray userPrefs       = dataModel.GetPreferencesFromUser(userID);

                        tasks.Add(Task.Factory.StartNew(() => {
                            List <double[]> featureVectors = new List <double[]>();
                            while (itemIDsFromUser.MoveNext())
                            {
                                long itemID = itemIDsFromUser.Current;
                                featureVectors.Add(features.getItemFeatureColumn(itemIndex(itemID)));
                            }

                            var userFeatures = usesImplicitFeedback
                                          ? implicitFeedbackSolver.solve(sparseUserRatingVector(userPrefs))
                                          : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(userPrefs), lambda, numFeatures);

                            features.setFeatureColumnInU(userIndex(userID), userFeatures);
                        }
                                                        ));
                    }
                } finally {
                    // queue.shutdown();
                    try {
                        Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumUsers());
                    } catch (AggregateException e) {
                        log.Warn("Error when computing user features", e);
                        throw e;
                    }
                }

                /// fix U - compute M
                //queue = createQueue();
                tasks = new List <Task>();

                var itemIDsIterator = dataModel.GetItemIDs();
                try {
                    ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback
            ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, userY) : null;

                    while (itemIDsIterator.MoveNext())
                    {
                        long             itemID    = itemIDsIterator.Current;
                        IPreferenceArray itemPrefs = dataModel.GetPreferencesForItem(itemID);

                        tasks.Add(Task.Factory.StartNew(() => {
                            var featureVectors = new List <double[]>();
                            foreach (IPreference pref in itemPrefs)
                            {
                                long userID = pref.GetUserID();
                                featureVectors.Add(features.getUserFeatureColumn(userIndex(userID)));
                            }

                            var itemFeatures = usesImplicitFeedback
                  ? implicitFeedbackSolver.solve(sparseItemRatingVector(itemPrefs))
                  : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(itemPrefs), lambda, numFeatures);

                            features.setFeatureColumnInM(itemIndex(itemID), itemFeatures);
                        }));
                    }
                } finally {
                    try {
                        Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumItems());
                        //queue.awaitTermination(dataModel.getNumItems(), TimeUnit.SECONDS);
                    } catch (AggregateException e) {
                        log.Warn("Error when computing item features", e);
                        throw e;
                    }
                }
            }

            log.Info("finished computation of the factorization...");
            return(createFactorization(features.getU(), features.getM()));
        }
Пример #13
0
        public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                                     IDataModelBuilder dataModelBuilder,
                                     IDataModel dataModel,
                                     IDRescorer rescorer,
                                     int at,
                                     double relevanceThreshold,
                                     double evaluationPercentage)
        {
            //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
            //Preconditions.checkArgument(dataModel != null, "dataModel is null");
            //Preconditions.checkArgument(at >= 1, "at must be at least 1");
            //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
            //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

            int             numItems  = dataModel.GetNumItems();
            IRunningAverage precision = new FullRunningAverage();
            IRunningAverage recall    = new FullRunningAverage();
            IRunningAverage fallOut   = new FullRunningAverage();
            IRunningAverage nDCG      = new FullRunningAverage();
            int             numUsersRecommendedFor      = 0;
            int             numUsersWithRecommendations = 0;

            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;

                if (random.nextDouble() >= evaluationPercentage)
                {
                    // Skipped
                    continue;
                }

                var stopWatch = new System.Diagnostics.Stopwatch();
                stopWatch.Start();

                IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

                // List some most-preferred items that would count as (most) "relevant" results
                double    theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
                FastIDSet relevantItemIDs       = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

                int numRelevantItems = relevantItemIDs.Count();
                if (numRelevantItems <= 0)
                {
                    continue;
                }

                FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers());
                var it2 = dataModel.GetUserIDs();
                while (it2.MoveNext())
                {
                    dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
                }

                IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
                try {
                    trainingModel.GetPreferencesFromUser(userID);
                } catch (NoSuchUserException nsee) {
                    continue; // Oops we excluded all prefs for the user -- just move on
                }

                int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
                if (size < 2 * at)
                {
                    // Really not enough prefs to meaningfully evaluate this user
                    continue;
                }

                IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

                int intersectionSize = 0;
                var recommendedItems = recommender.Recommend(userID, at, rescorer);
                foreach (IRecommendedItem recommendedItem in recommendedItems)
                {
                    if (relevantItemIDs.Contains(recommendedItem.GetItemID()))
                    {
                        intersectionSize++;
                    }
                }

                int numRecommendedItems = recommendedItems.Count;

                // Precision
                if (numRecommendedItems > 0)
                {
                    precision.AddDatum((double)intersectionSize / (double)numRecommendedItems);
                }

                // Recall
                recall.AddDatum((double)intersectionSize / (double)numRelevantItems);

                // Fall-out
                if (numRelevantItems < size)
                {
                    fallOut.AddDatum((double)(numRecommendedItems - intersectionSize)
                                     / (double)(numItems - numRelevantItems));
                }

                // nDCG
                // In computing, assume relevant IDs have relevance 1 and others 0
                double cumulativeGain = 0.0;
                double idealizedGain  = 0.0;
                for (int i = 0; i < numRecommendedItems; i++)
                {
                    IRecommendedItem item     = recommendedItems[i];
                    double           discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
                    if (relevantItemIDs.Contains(item.GetItemID()))
                    {
                        cumulativeGain += discount;
                    }
                    // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

                    // Ideally results would be ordered with all relevant ones first, so this theoretical
                    // ideal list starts with number of relevant items equal to the total number of relevant items
                    if (i < numRelevantItems)
                    {
                        idealizedGain += discount;
                    }
                }
                if (idealizedGain > 0.0)
                {
                    nDCG.AddDatum(cumulativeGain / idealizedGain);
                }

                // Reach
                numUsersRecommendedFor++;
                if (numRecommendedItems > 0)
                {
                    numUsersWithRecommendations++;
                }

                stopWatch.Stop();

                log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
                log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
                         precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
                         (double)numUsersWithRecommendations / (double)numUsersRecommendedFor);
            }

            return(new IRStatisticsImpl(
                       precision.GetAverage(),
                       recall.GetAverage(),
                       fallOut.GetAverage(),
                       nDCG.GetAverage(),
                       (double)numUsersWithRecommendations / (double)numUsersRecommendedFor));
        }
Пример #14
0
 public override FastIDSet GetItemIDsFromUser(long userID)
 {
     return(_delegate.GetItemIDsFromUser(userID));
 }