예제 #1
0
        public void testStrategy()
        {
            List <IPreference> prefsOfUser123 = new List <IPreference>();

            prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f));

            List <IPreference> prefsOfUser456 = new List <IPreference>();

            prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f));
            prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f));

            List <IPreference> prefsOfUser789 = new List <IPreference>();

            prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f));
            prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f));

            IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123);

            FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>();

            userData.Put(123L, prefArrayOfUser123);
            userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456));
            userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789));

            IDataModel dataModel = new GenericDataModel(userData);

            ICandidateItemsStrategy strategy =
                new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems());

            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel);

            Assert.True(candidateItems.Count() <= 1);
            Assert.False(candidateItems.Contains(1L));
        }
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel = getDataModel();
            FastIDSet  xPrefs    = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  yPrefs    = dataModel.GetItemIDsFromUser(userID2);

            int xPrefsSize = xPrefs.Count();
            int yPrefsSize = yPrefs.Count();

            if (xPrefsSize == 0 && yPrefsSize == 0)
            {
                return(Double.NaN);
            }
            if (xPrefsSize == 0 || yPrefsSize == 0)
            {
                return(0.0);
            }

            int intersectionSize =
                xPrefsSize < yPrefsSize?yPrefs.IntersectionSize(xPrefs) : xPrefs.IntersectionSize(yPrefs);

            if (intersectionSize == 0)
            {
                return(Double.NaN);
            }

            int unionSize = xPrefsSize + yPrefsSize - intersectionSize;

            return((double)intersectionSize / (double)unionSize);
        }
        public void testStrategy()
        {
            FastIDSet allItemIDs = new FastIDSet();

            allItemIDs.AddAll(new long[] { 1L, 2L, 3L });

            FastIDSet preferredItemIDs = new FastIDSet(1);

            preferredItemIDs.Add(2L);

            var dataModelMock = new DynamicMock(typeof(IDataModel));

            dataModelMock.ExpectAndReturn("GetNumItems", 3);
            dataModelMock.ExpectAndReturn("GetItemIDs", allItemIDs.GetEnumerator());

            IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(123L, 2L, 1.0f)
            });

            ICandidateItemsStrategy strategy = new AllUnknownItemsCandidateItemsStrategy();

            //EasyMock.replay(dataModel);


            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance);

            Assert.AreEqual(2, candidateItems.Count());
            Assert.True(candidateItems.Contains(1L));
            Assert.True(candidateItems.Contains(3L));

            dataModelMock.Verify();
            //EasyMock.verify(dataModel);
        }
예제 #4
0
 public void testClear()
 {
     FastIDSet set = new FastIDSet();
     set.Add(1);
     set.Clear();
     Assert.AreEqual(0, set.Count());
     Assert.True(set.IsEmpty());
     Assert.False(set.Contains(1));
 }
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel        = getDataModel();
            FastIDSet  prefs1           = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  prefs2           = dataModel.GetItemIDsFromUser(userID2);
            int        prefs1Size       = prefs1.Count();
            int        prefs2Size       = prefs2.Count();
            int        intersectionSize = prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2);

            return(doSimilarity(prefs1Size, prefs2Size, intersectionSize));
        }
        protected override void prepareTraining()
        {
            base.prepareTraining();
            var random = RandomUtils.getRandom();

            p = new double[dataModel.GetNumUsers()][];
            for (int i = 0; i < p.Length; i++)
            {
                p[i] = new double[numFeatures];
                for (int feature = 0; feature < FEATURE_OFFSET; feature++)
                {
                    p[i][feature] = 0;
                }
                for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++)
                {
                    p[i][feature] = random.nextGaussian() * randomNoise;
                }
            }

            y = new double[dataModel.GetNumItems()][];
            for (int i = 0; i < y.Length; i++)
            {
                y[i] = new double[numFeatures];
                for (int feature = 0; feature < FEATURE_OFFSET; feature++)
                {
                    y[i][feature] = 0;
                }
                for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++)
                {
                    y[i][feature] = random.nextGaussian() * randomNoise;
                }
            }

            /// get internal item IDs which we will need several times
            itemsByUser = new Dictionary <int, List <int> >();
            var userIDs = dataModel.GetUserIDs();

            while (userIDs.MoveNext())
            {
                long       userId          = userIDs.Current;
                int        userIdx         = userIndex(userId);
                FastIDSet  itemIDsFromUser = dataModel.GetItemIDsFromUser(userId);
                List <int> itemIndexes     = new List <int>(itemIDsFromUser.Count());
                itemsByUser[userIdx] = itemIndexes;
                foreach (long itemID2 in itemIDsFromUser)
                {
                    int i2 = itemIndex(itemID2);
                    itemIndexes.Add(i2);
                }
            }
        }
 public FastIDSet GetRelevantItemsIDs(long userID,
                                      int at,
                                      double relevanceThreshold,
                                      IDataModel dataModel) {
   IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);
   FastIDSet relevantItemIDs = new FastIDSet(at);
   prefs.SortByValueReversed();
   for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) {
     if (prefs.GetValue(i) >= relevanceThreshold) {
       relevantItemIDs.Add(prefs.GetItemID(i));
     }
   }
   return relevantItemIDs;
 }
예제 #8
0
 private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs)
 {
     if (itemIDs.Count() > maxItemsPerUser)
     {
         var it =
             new SamplinglongPrimitiveIterator(itemIDs.GetEnumerator(), (double)maxItemsPerUser / itemIDs.Count());
         while (it.MoveNext())
         {
             possibleItemIDs.Add(it.Current);
         }
     }
     else
     {
         possibleItemIDs.AddAll(itemIDs);
     }
 }
예제 #9
0
        public FastIDSet GetRelevantItemsIDs(long userID,
                                             int at,
                                             double relevanceThreshold,
                                             IDataModel dataModel)
        {
            IPreferenceArray prefs           = dataModel.GetPreferencesFromUser(userID);
            FastIDSet        relevantItemIDs = new FastIDSet(at);

            prefs.SortByValueReversed();
            for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++)
            {
                if (prefs.GetValue(i) >= relevanceThreshold)
                {
                    relevantItemIDs.Add(prefs.GetItemID(i));
                }
            }
            return(relevantItemIDs);
        }
        public override int GetNumUsersWithPreferenceFor(long itemID1, long itemID2)
        {
            FastIDSet userIDs1 = preferenceForItems.Get(itemID1);

            if (userIDs1 == null)
            {
                return(0);
            }
            FastIDSet userIDs2 = preferenceForItems.Get(itemID2);

            if (userIDs2 == null)
            {
                return(0);
            }
            return(userIDs1.Count() < userIDs2.Count()
        ? userIDs2.IntersectionSize(userIDs1)
        : userIDs1.IntersectionSize(userIDs2));
        }
        public override IPreferenceArray GetPreferencesForItem(long itemID)
        {
            FastIDSet userIDs = preferenceForItems.Get(itemID);

            if (userIDs == null)
            {
                throw new NoSuchItemException(itemID);
            }
            IPreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.Count());
            int i  = 0;
            var it = userIDs.GetEnumerator();

            while (it.MoveNext())
            {
                prefArray.SetUserID(i, it.Current);
                prefArray.SetItemID(i, itemID);
                i++;
            }
            return(prefArray);
        }
        /// @throws NoSuchUserException
        ///           if there is no such user
        public override IPreferenceArray GetPreferencesFromUser(long userID)
        {
            FastIDSet itemIDs = preferenceFromUsers.Get(userID);

            if (itemIDs == null)
            {
                throw new NoSuchUserException(userID);
            }
            IPreferenceArray prefArray = new BooleanUserPreferenceArray(itemIDs.Count());
            int i  = 0;
            var it = itemIDs.GetEnumerator();

            while (it.MoveNext())
            {
                prefArray.SetUserID(i, userID);
                prefArray.SetItemID(i, it.Current);
                i++;
            }
            return(prefArray);
        }
        public void testStrategy()
        {
            FastIDSet itemIDsFromUser123 = new FastIDSet();

            itemIDsFromUser123.Add(1L);

            FastIDSet itemIDsFromUser456 = new FastIDSet();

            itemIDsFromUser456.Add(1L);
            itemIDsFromUser456.Add(2L);

            List <IPreference> prefs = new List <IPreference>();

            prefs.Add(new GenericPreference(123L, 1L, 1.0f));
            prefs.Add(new GenericPreference(456L, 1L, 1.0f));
            IPreferenceArray preferencesForItem1 = new GenericItemPreferenceArray(prefs);

            var dataModelMock = new DynamicMock(typeof(IDataModel));

            dataModelMock.ExpectAndReturn("GetPreferencesForItem", preferencesForItem1, (1L));
            dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser123, (123L));
            dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser456, (456L));

            IPreferenceArray prefArrayOfUser123 =
                new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(123L, 1L, 1.0f)
            });

            ICandidateItemsStrategy strategy = new PreferredItemsNeighborhoodCandidateItemsStrategy();

            //EasyMock.replay(dataModel);

            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance);

            Assert.AreEqual(1, candidateItems.Count());
            Assert.True(candidateItems.Contains(2L));

            dataModelMock.Verify(); //  EasyMock.verify(dataModel);
        }
예제 #14
0
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel = getDataModel();
            FastIDSet  prefs1    = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  prefs2    = dataModel.GetItemIDsFromUser(userID2);

            long prefs1Size       = prefs1.Count();
            long prefs2Size       = prefs2.Count();
            long intersectionSize =
                prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2);

            if (intersectionSize == 0)
            {
                return(Double.NaN);
            }
            long   numItems      = dataModel.GetNumItems();
            double logLikelihood =
                LogLikelihood.logLikelihoodRatio(intersectionSize,
                                                 prefs2Size - intersectionSize,
                                                 prefs1Size - intersectionSize,
                                                 numItems - prefs1Size - prefs2Size + intersectionSize);

            return(1.0 - 1.0 / (1.0 + logLikelihood));
        }
예제 #15
0
 public void testSizeEmpty()
 {
     FastIDSet set = new FastIDSet();
     Assert.AreEqual(0, set.Count());
     Assert.True(set.IsEmpty());
     set.Add(1);
     Assert.AreEqual(1, set.Count());
     Assert.False(set.IsEmpty());
     set.Remove(1);
     Assert.AreEqual(0, set.Count());
     Assert.True(set.IsEmpty());
 }
        public override int GetNumUsersWithPreferenceFor(long itemID)
        {
            FastIDSet userIDs1 = preferenceForItems.Get(itemID);

            return(userIDs1 == null ? 0 : userIDs1.Count());
        }
예제 #17
0
        public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                                     IDataModelBuilder dataModelBuilder,
                                     IDataModel dataModel,
                                     IDRescorer rescorer,
                                     int at,
                                     double relevanceThreshold,
                                     double evaluationPercentage)
        {
            //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
            //Preconditions.checkArgument(dataModel != null, "dataModel is null");
            //Preconditions.checkArgument(at >= 1, "at must be at least 1");
            //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
            //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

            int             numItems  = dataModel.GetNumItems();
            IRunningAverage precision = new FullRunningAverage();
            IRunningAverage recall    = new FullRunningAverage();
            IRunningAverage fallOut   = new FullRunningAverage();
            IRunningAverage nDCG      = new FullRunningAverage();
            int             numUsersRecommendedFor      = 0;
            int             numUsersWithRecommendations = 0;

            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;

                if (random.nextDouble() >= evaluationPercentage)
                {
                    // Skipped
                    continue;
                }

                var stopWatch = new System.Diagnostics.Stopwatch();
                stopWatch.Start();

                IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

                // List some most-preferred items that would count as (most) "relevant" results
                double    theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
                FastIDSet relevantItemIDs       = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

                int numRelevantItems = relevantItemIDs.Count();
                if (numRelevantItems <= 0)
                {
                    continue;
                }

                FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers());
                var it2 = dataModel.GetUserIDs();
                while (it2.MoveNext())
                {
                    dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
                }

                IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
                try {
                    trainingModel.GetPreferencesFromUser(userID);
                } catch (NoSuchUserException nsee) {
                    continue; // Oops we excluded all prefs for the user -- just move on
                }

                int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
                if (size < 2 * at)
                {
                    // Really not enough prefs to meaningfully evaluate this user
                    continue;
                }

                IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

                int intersectionSize = 0;
                var recommendedItems = recommender.Recommend(userID, at, rescorer);
                foreach (IRecommendedItem recommendedItem in recommendedItems)
                {
                    if (relevantItemIDs.Contains(recommendedItem.GetItemID()))
                    {
                        intersectionSize++;
                    }
                }

                int numRecommendedItems = recommendedItems.Count;

                // Precision
                if (numRecommendedItems > 0)
                {
                    precision.AddDatum((double)intersectionSize / (double)numRecommendedItems);
                }

                // Recall
                recall.AddDatum((double)intersectionSize / (double)numRelevantItems);

                // Fall-out
                if (numRelevantItems < size)
                {
                    fallOut.AddDatum((double)(numRecommendedItems - intersectionSize)
                                     / (double)(numItems - numRelevantItems));
                }

                // nDCG
                // In computing, assume relevant IDs have relevance 1 and others 0
                double cumulativeGain = 0.0;
                double idealizedGain  = 0.0;
                for (int i = 0; i < numRecommendedItems; i++)
                {
                    IRecommendedItem item     = recommendedItems[i];
                    double           discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
                    if (relevantItemIDs.Contains(item.GetItemID()))
                    {
                        cumulativeGain += discount;
                    }
                    // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

                    // Ideally results would be ordered with all relevant ones first, so this theoretical
                    // ideal list starts with number of relevant items equal to the total number of relevant items
                    if (i < numRelevantItems)
                    {
                        idealizedGain += discount;
                    }
                }
                if (idealizedGain > 0.0)
                {
                    nDCG.AddDatum(cumulativeGain / idealizedGain);
                }

                // Reach
                numUsersRecommendedFor++;
                if (numRecommendedItems > 0)
                {
                    numUsersWithRecommendations++;
                }

                stopWatch.Stop();

                log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
                log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
                         precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
                         (double)numUsersWithRecommendations / (double)numUsersRecommendedFor);
            }

            return(new IRStatisticsImpl(
                       precision.GetAverage(),
                       recall.GetAverage(),
                       fallOut.GetAverage(),
                       nDCG.GetAverage(),
                       (double)numUsersWithRecommendations / (double)numUsersRecommendedFor));
        }
 private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) {
   if (itemIDs.Count() > maxItemsPerUser) {
     var it =
         new SamplinglongPrimitiveIterator(itemIDs.GetEnumerator(), (double) maxItemsPerUser / itemIDs.Count() );
     while (it.MoveNext()) {
       possibleItemIDs.Add(it.Current);
     }
   } else {
     possibleItemIDs.AddAll(itemIDs);
   }
 }
예제 #19
0
 public void testVersusHashSet()
 {
     FastIDSet actual = new FastIDSet(1);
     var expected = new HashSet<int>(); //1000000
     var r = RandomUtils.getRandom();
     for (int i = 0; i < 1000000; i++) {
       double d = r.nextDouble();
       var key = r.nextInt(100);
       if (d < 0.4) {
     Assert.AreEqual(expected.Contains(key), actual.Contains(key));
       } else {
     if (d < 0.7) {
       Assert.AreEqual(expected.Add(key), actual.Add(key));
     } else {
       Assert.AreEqual(expected.Remove(key), actual.Remove(key));
     }
     Assert.AreEqual(expected.Count, actual.Count() );
     Assert.AreEqual(expected.Count==0, actual.IsEmpty());
       }
     }
 }