public void persistAndLoad()
        {
            FastByIDMap<int?> userIDMapping = new FastByIDMap<int?>();
            FastByIDMap<int?> itemIDMapping = new FastByIDMap<int?>();

            userIDMapping.Put(123, 0);
            userIDMapping.Put(456, 1);

            itemIDMapping.Put(12, 0);
            itemIDMapping.Put(34, 1);

            double[][] userFeatures = new double[][] { new double[] { 0.1, 0.2, 0.3 }, new double[] { 0.4, 0.5, 0.6 } };
            double[][] itemFeatures = new double[][] { new double[] { 0.7, 0.8, 0.9 }, new double[] { 1.0, 1.1, 1.2 } };

            Factorization original = new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures);
            var storage = Path.Combine( Path.GetTempPath(), "storage.bin");
            try {
            IPersistenceStrategy persistenceStrategy = new FilePersistenceStrategy(storage);

            Assert.IsNull(persistenceStrategy.Load());

            persistenceStrategy.MaybePersist(original);
            Factorization clone = persistenceStrategy.Load();

            Assert.True(original.Equals( clone ) );
            } finally {
            if (File.Exists(storage))
            try { File.Delete(storage); } catch { }
            }
        }
Exemplo n.º 2
0
        public void setUpToyData()
        {
            this.rank          = 3;
            this.lambda        = 0.01;
            this.numIterations = 1000;

            FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>();

            userData.Put(1L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(1L, 1L, 5.0f),
                new GenericPreference(1L, 2L, 5.0f),
                new GenericPreference(1L, 3L, 2.0f)
            }));

            userData.Put(2L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(2L, 1L, 2.0f),
                new GenericPreference(2L, 3L, 3.0f),
                new GenericPreference(2L, 4L, 5.0f)
            }));

            userData.Put(3L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(3L, 2L, 5.0f),
                new GenericPreference(3L, 4L, 3.0f)
            }));

            userData.Put(4L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(4L, 1L, 3.0f),
                new GenericPreference(4L, 4L, 5.0f)
            }));
            dataModel = new GenericDataModel(userData);
        }
Exemplo n.º 3
0
 private void initSimilarityMaps(IEnumerator <ItemItemSimilarity> similarities)
 {
     while (similarities.MoveNext())
     {
         ItemItemSimilarity current = similarities.Current;
         long num  = current.getItemID1();
         long num2 = current.getItemID2();
         if (num != num2)
         {
             long num3;
             long num4;
             if (num < num2)
             {
                 num3 = num;
                 num4 = num2;
             }
             else
             {
                 num3 = num2;
                 num4 = num;
             }
             FastByIDMap <double?> map = this.similarityMaps.get(num3);
             if (map == null)
             {
                 map = new FastByIDMap <double?>();
                 this.similarityMaps.put(num3, map);
             }
             map.put(num4, new double?(current.getValue()));
             this.doIndex(num3, num4);
             this.doIndex(num4, num3);
         }
     }
 }
Exemplo n.º 4
0
 public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel)
 {
     this.similarityMaps      = new FastByIDMap <FastByIDMap <double?> >();
     this.similarItemIDsIndex = new FastByIDMap <FastIDSet>();
     long[] itemIDs = GenericUserSimilarity.longIteratorToList(dataModel.getItemIDs());
     this.initSimilarityMaps(new DataModelSimilaritiesIterator(otherSimilarity, itemIDs));
 }
Exemplo n.º 5
0
        private void buildClusters()
        {
            DataModel model    = getDataModel();
            int       numUsers = model.getNumUsers();

            if (numUsers > 0)
            {
                List <FastIDSet> newClusters = new List <FastIDSet>();
                // Begin with a cluster for each user:
                var it = model.getUserIDs();
                while (it.MoveNext())
                {
                    FastIDSet newCluster = new FastIDSet();
                    newCluster.add(it.Current);
                    newClusters.Add(newCluster);
                }
                if (numUsers > 1)
                {
                    findClusters(newClusters);
                }
                topRecsByUserID  = computeTopRecsPerUserID(newClusters);
                clustersByUserID = computeClustersPerUserID(newClusters);
                allClusters      = newClusters.ToArray();
            }
            else
            {
                topRecsByUserID  = new FastByIDMap <List <RecommendedItem> >();
                clustersByUserID = new FastByIDMap <FastIDSet>();
                allClusters      = NO_CLUSTERS;
            }
        }
Exemplo n.º 6
0
        public GenericDataModel(FastByIDMap <PreferenceArray> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps)
        {
            this.preferenceFromUsers = userData;
            FastByIDMap <List <Preference> > data = new FastByIDMap <List <Preference> >();
            FastIDSet set = new FastIDSet();
            int       num = 0;
            float     negativeInfinity = float.NegativeInfinity;
            float     positiveInfinity = float.PositiveInfinity;

            foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceFromUsers.entrySet())
            {
                PreferenceArray array = pair.Value;
                array.sortByItem();
                foreach (Preference preference in array)
                {
                    long key = preference.getItemID();
                    set.add(key);
                    List <Preference> list = data.get(key);
                    if (list == null)
                    {
                        list = new List <Preference>(2);
                        data.put(key, list);
                    }
                    list.Add(preference);
                    float num5 = preference.getValue();
                    if (num5 > negativeInfinity)
                    {
                        negativeInfinity = num5;
                    }
                    if (num5 < positiveInfinity)
                    {
                        positiveInfinity = num5;
                    }
                }
                if ((++num % 0x2710) == 0)
                {
                    log.info("Processed {0} users", new object[] { num });
                }
            }
            log.info("Processed {0} users", new object[] { num });
            this.setMinPreference(positiveInfinity);
            this.setMaxPreference(negativeInfinity);
            this.itemIDs = set.toArray();
            set          = null;
            Array.Sort <long>(this.itemIDs);
            this.preferenceForItems = toDataMap(data, false);
            foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceForItems.entrySet())
            {
                pair.Value.sortByUser();
            }
            this.userIDs = new long[userData.size()];
            int num6 = 0;

            foreach (long num7 in userData.Keys)
            {
                this.userIDs[num6++] = num7;
            }
            Array.Sort <long>(this.userIDs);
            this.timestamps = timestamps;
        }
Exemplo n.º 7
0
        public double itemSimilarity(long itemID1, long itemID2)
        {
            long num;
            long num2;

            if (itemID1 == itemID2)
            {
                return(1.0);
            }
            if (itemID1 < itemID2)
            {
                num  = itemID1;
                num2 = itemID2;
            }
            else
            {
                num  = itemID2;
                num2 = itemID1;
            }
            FastByIDMap <double?> map = this.similarityMaps.get(num);

            if (map == null)
            {
                return(double.NaN);
            }
            double?nullable = map.get(num2);

            return(!nullable.HasValue ? double.NaN : nullable.Value);
        }
        private void buildClusters()
        {
            DataModel model    = getDataModel();
            int       numUsers = model.getNumUsers();

            if (numUsers == 0)
            {
                topRecsByUserID  = new FastByIDMap <List <RecommendedItem> >();
                clustersByUserID = new FastByIDMap <FastIDSet>();
            }
            else
            {
                List <FastIDSet> clusters = new List <FastIDSet>();
                // Begin with a cluster for each user:
                var it = model.getUserIDs();
                while (it.MoveNext())
                {
                    FastIDSet newCluster = new FastIDSet();
                    newCluster.add(it.Current);
                    clusters.Add(newCluster);
                }

                bool done = false;
                while (!done)
                {
                    done = mergeClosestClusters(numUsers, clusters, done);
                }

                topRecsByUserID  = computeTopRecsPerUserID(clusters);
                clustersByUserID = computeClustersPerUserID(clusters);
                allClusters      = clusters.ToArray();
            }
        }
Exemplo n.º 9
0
        public void ProcessOtherUser(long userID,
                                     FastIDSet relevantItemIDs,
                                     FastByIDMap <IPreferenceArray> trainingUsers,
                                     long otherUserID,
                                     IDataModel dataModel)
        {
            IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID);

            // If we're dealing with the very user that we're evaluating for precision/recall,
            if (userID == otherUserID)
            {
                // then must remove all the test IDs, the "relevant" item IDs
                List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length());
                foreach (IPreference pref in prefs2Array)
                {
                    if (!relevantItemIDs.Contains(pref.GetItemID()))
                    {
                        prefs2.Add(pref);
                    }
                }

                if (prefs2.Count > 0)
                {
                    trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2));
                }
            }
            else
            {
                // otherwise just add all those other user's prefs
                trainingUsers.Put(otherUserID, prefs2Array);
            }
        }
        public void testGetUserIDs()
        {
            IPreferenceArray prefs        = new GenericUserPreferenceArray(1);
            long             sampleUserID = 1;

            prefs.SetUserID(0, sampleUserID);
            long sampleItemID = 11;

            prefs.SetItemID(0, sampleItemID);

            FastByIDMap <IPreferenceArray> delegatePreferences = new FastByIDMap <IPreferenceArray>();

            delegatePreferences.Put(sampleUserID, prefs);

            PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences);

            long anonymousUserID = instance.TakeAvailableUser().Value;

            IPreferenceArray tempPrefs = new GenericUserPreferenceArray(1);

            tempPrefs.SetUserID(0, anonymousUserID);
            tempPrefs.SetItemID(0, 22);

            instance.SetTempPrefs(tempPrefs, anonymousUserID);

            var userIDs = instance.GetUserIDs();

            userIDs.MoveNext();

            Assert.AreEqual(sampleUserID, userIDs.Current);
            Assert.False(userIDs.MoveNext());
        }
Exemplo n.º 11
0
        private void pruneInconsequentialDiffs()
        {
            // Go back and prune inconsequential diffs. "Inconsequential" means, here, only represented by one
            // data point, so possibly unreliable
            var it1 = averageDiffs.entrySet().ToList();

            for (int i = 0; i < it1.Count; i++)
            {
                FastByIDMap <RunningAverage> map = it1[i].Value;

                var it2 = map.entrySet().ToList();
                for (int j = 0; j < it2.Count; j++)
                {
                    RunningAverage average = it2[j].Value;
                    if (average.getCount() <= 1)
                    {
                        map.remove(it2[j].Key);
                    }
                }
                if (map.isEmpty())
                {
                    averageDiffs.remove(it1[i].Key);
                }
                else
                {
                    map.rehash();
                }
            }
            averageDiffs.rehash();
        }
Exemplo n.º 12
0
        public void removeItemPref(long userID, long itemIDA, float prefValue)
        {
            PreferenceArray userPreferences = dataModel.getPreferencesFromUser(userID);

            try
            {
                buildAverageDiffsLock.EnterWriteLock();

                FastByIDMap <RunningAverage> aMap = averageDiffs.get(itemIDA);

                int length = userPreferences.length();
                for (int i = 0; i < length; i++)
                {
                    long  itemIDB = userPreferences.getItemID(i);
                    float bValue  = userPreferences.getValue(i);

                    if (itemIDA < itemIDB)
                    {
                        if (aMap != null)
                        {
                            RunningAverage average = aMap.get(itemIDB);
                            if (average != null)
                            {
                                if (average.getCount() <= 1)
                                {
                                    aMap.remove(itemIDB);
                                }
                                else
                                {
                                    average.removeDatum(bValue - prefValue);
                                }
                            }
                        }
                    }
                    else if (itemIDA > itemIDB)
                    {
                        FastByIDMap <RunningAverage> bMap = averageDiffs.get(itemIDB);
                        if (bMap != null)
                        {
                            RunningAverage average = bMap.get(itemIDA);
                            if (average != null)
                            {
                                if (average.getCount() <= 1)
                                {
                                    aMap.remove(itemIDA);
                                }
                                else
                                {
                                    average.removeDatum(prefValue - bValue);
                                }
                            }
                        }
                    }
                }
            }
            finally
            {
                buildAverageDiffsLock.ExitWriteLock();
            }
        }
Exemplo n.º 13
0
        /// <p>
        /// Returns the similarity between two items. Note that similarity is assumed to be symmetric, that
        /// {@code itemSimilarity(item1, item2) == itemSimilarity(item2, item1)}, and that
        /// {@code itemSimilarity(item1,item1) == 1.0} for all items.
        /// </p>
        ///
        /// @param itemID1
        ///          first item
        /// @param itemID2
        ///          second item
        /// @return similarity between the two
        public double ItemSimilarity(long itemID1, long itemID2)
        {
            if (itemID1 == itemID2)
            {
                return(1.0);
            }
            long firstID;
            long secondID;

            if (itemID1 < itemID2)
            {
                firstID  = itemID1;
                secondID = itemID2;
            }
            else
            {
                firstID  = itemID2;
                secondID = itemID1;
            }
            FastByIDMap <double?> nextMap = similarityMaps.Get(firstID);

            if (nextMap == null)
            {
                return(Double.NaN);
            }
            double?similarity = nextMap.Get(secondID);

            return(!similarity.HasValue ? Double.NaN : similarity.Value);
        }
        public void testStrategy()
        {
            List<IPreference> prefsOfUser123 = new List<IPreference>();
            prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f));

            List<IPreference> prefsOfUser456 = new List<IPreference>();
            prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f));
            prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f));

            List<IPreference> prefsOfUser789 = new List<IPreference>();
            prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f));
            prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f));

            IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123);

            FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>();
            userData.Put(123L, prefArrayOfUser123);
            userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456));
            userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789));

            IDataModel dataModel = new GenericDataModel(userData);

            ICandidateItemsStrategy strategy =
            new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems());

            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel);
            Assert.True(candidateItems.Count() <= 1);
            Assert.False(candidateItems.Contains(1L));
        }
Exemplo n.º 15
0
        public override void SetUp()
        {
            base.SetUp();
            FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>();

            userData.Put(1L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(1L, 1L, 5.0f),
                new GenericPreference(1L, 2L, 5.0f),
                new GenericPreference(1L, 3L, 2.0f)
            }));

            userData.Put(2L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(2L, 1L, 2.0f),
                new GenericPreference(2L, 3L, 3.0f),
                new GenericPreference(2L, 4L, 5.0f)
            }));

            userData.Put(3L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(3L, 2L, 5.0f),
                new GenericPreference(3L, 4L, 3.0f)
            }));

            userData.Put(4L, new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(4L, 1L, 3.0f),
                new GenericPreference(4L, 4L, 5.0f)
            }));

            dataModel  = new GenericDataModel(userData);
            factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 10);
        }
Exemplo n.º 16
0
        public void setUpSyntheticData()
        {
            int    numUsers = 2000;
            int    numItems = 1000;
            double sparsity = 0.5;

            this.rank          = 20;
            this.lambda        = 0.000000001;
            this.numIterations = 100;

            var users   = randomMatrix(numUsers, rank, 1);
            var items   = randomMatrix(rank, numItems, 1);
            var ratings = times(users, items);

            normalize(ratings, 5);

            FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>();

            for (int userIndex = 0; userIndex < numUsers; userIndex++)
            {
                List <IPreference> row = new List <IPreference>();
                for (int itemIndex = 0; itemIndex < numItems; itemIndex++)
                {
                    if (random.nextDouble() <= sparsity)
                    {
                        row.Add(new GenericPreference(userIndex, itemIndex, (float)ratings[userIndex, itemIndex]));
                    }
                }

                userData.Put(userIndex, new GenericUserPreferenceArray(row));
            }

            dataModel = new GenericDataModel(userData);
        }
Exemplo n.º 17
0
        internal static IDataModel BuildModel(IList <UserItem> userItems, bool isReviewBased)
        {
            FastByIDMap <IList <IPreference> > userPreferencesMap = new FastByIDMap <IList <IPreference> >();

            foreach (var userItem in userItems)
            {
                var userPreferences = userPreferencesMap.Get(userItem.UserId);
                if (userPreferences == null)
                {
                    userPreferences = new List <IPreference>(3);
                    userPreferencesMap.Put(userItem.UserId, userPreferences);
                }

                if (isReviewBased)
                {
                    userPreferences.Add(new GenericPreference(userItem.UserId, userItem.ItemId, userItem.Rating));
                }
                else
                {
                    userPreferences.Add(new BooleanPreference(userItem.UserId, userItem.ItemId));
                }
            }

            var resultUserPreferences = new FastByIDMap <IPreferenceArray>(userPreferencesMap.Count());

            foreach (var entry in userPreferencesMap.EntrySet())
            {
                var prefList = (List <IPreference>)entry.Value;
                resultUserPreferences.Put(entry.Key, isReviewBased ?
                                          new GenericUserPreferenceArray(prefList) :
                                          (IPreferenceArray) new BooleanUserPreferenceArray(prefList));
            }

            return(new GenericDataModel(resultUserPreferences));
        }
Exemplo n.º 18
0
 public Factorization(FastByIDMap <int?> userIDMapping, FastByIDMap <int?> itemIDMapping, double[][] userFeatures, double[][] itemFeatures)
 {
     this.userIDMapping = userIDMapping;
     this.itemIDMapping = itemIDMapping;
     this.userFeatures  = userFeatures;
     this.itemFeatures  = itemFeatures;
 }
Exemplo n.º 19
0
  public Factorization(FastByIDMap<int?> userIDMapping, FastByIDMap<int?> itemIDMapping, double[][] userFeatures,
      double[][] itemFeatures) {
    this.userIDMapping = userIDMapping; //Preconditions.checkNotNull(
	this.itemIDMapping = itemIDMapping; //Preconditions.checkNotNull();
    this.userFeatures = userFeatures;
    this.itemFeatures = itemFeatures;
  }
Exemplo n.º 20
0
        public double userSimilarity(long userID1, long userID2)
        {
            long num;
            long num2;

            if (userID1 == userID2)
            {
                return(1.0);
            }
            if (userID1 < userID2)
            {
                num  = userID1;
                num2 = userID2;
            }
            else
            {
                num  = userID2;
                num2 = userID1;
            }
            FastByIDMap <double> map = this.similarityMaps.get(num);

            if (map == null)
            {
                return(double.NaN);
            }
            return(map.get(num2));
        }
Exemplo n.º 21
0
        public GenericDataModel GetUserBasedDataModel()
        {
            FastByIDMap <IPreferenceArray> data = new FastByIDMap <IPreferenceArray>();

            IEnumerable <UserBookReview> allBookReviews = _userBookReviewRepository.GetListOf();

            var everyReviewsUserId = allBookReviews.GroupBy(b => b.UserId).Select(x => x.Key).ToList();

            foreach (int userId in everyReviewsUserId)
            {
                List <UserBookReview> bookReviewsForABook = (from userReviews in allBookReviews
                                                             where userReviews.UserId == userId
                                                             select userReviews).ToList();
                List <IPreference> listOfPreferences = new List <IPreference>();

                foreach (UserBookReview review in bookReviewsForABook)
                {
                    int rating             = review.Rating;
                    int bookId             = review.BookId;
                    GenericPreference pref = new GenericPreference(userId, bookId, rating); /// userId,  itemid, valueId

                    listOfPreferences.Add(pref);
                }

                GenericUserPreferenceArray dataArray = new GenericUserPreferenceArray(listOfPreferences);
                data.Put(userId, dataArray);
            }

            return(new GenericDataModel(data));
        }
Exemplo n.º 22
0
 private void initSimilarityMaps(IEnumerator <UserUserSimilarity> similarities)
 {
     while (similarities.MoveNext())
     {
         UserUserSimilarity current = similarities.Current;
         long num  = current.getUserID1();
         long num2 = current.getUserID2();
         if (num != num2)
         {
             long num3;
             long num4;
             if (num < num2)
             {
                 num3 = num;
                 num4 = num2;
             }
             else
             {
                 num3 = num2;
                 num4 = num;
             }
             FastByIDMap <double> map = this.similarityMaps.get(num3);
             if (map == null)
             {
                 map = new FastByIDMap <double>();
                 this.similarityMaps.put(num3, map);
             }
             map.put(num4, current.getValue());
         }
     }
 }
        public void persistAndLoad()
        {
            FastByIDMap <int?> userIDMapping = new FastByIDMap <int?>();
            FastByIDMap <int?> itemIDMapping = new FastByIDMap <int?>();

            userIDMapping.Put(123, 0);
            userIDMapping.Put(456, 1);

            itemIDMapping.Put(12, 0);
            itemIDMapping.Put(34, 1);

            double[][] userFeatures = new double[][] { new double[] { 0.1, 0.2, 0.3 }, new double[] { 0.4, 0.5, 0.6 } };
            double[][] itemFeatures = new double[][] { new double[] { 0.7, 0.8, 0.9 }, new double[] { 1.0, 1.1, 1.2 } };

            Factorization original = new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures);
            var           storage  = Path.Combine(Path.GetTempPath(), "storage.bin");

            try {
                IPersistenceStrategy persistenceStrategy = new FilePersistenceStrategy(storage);

                Assert.IsNull(persistenceStrategy.Load());

                persistenceStrategy.MaybePersist(original);
                Factorization clone = persistenceStrategy.Load();

                Assert.True(original.Equals(clone));
            } finally {
                if (File.Exists(storage))
                {
                    try { File.Delete(storage); } catch { }
                }
            }
        }
Exemplo n.º 24
0
        public void testStrategy()
        {
            List <IPreference> prefsOfUser123 = new List <IPreference>();

            prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f));

            List <IPreference> prefsOfUser456 = new List <IPreference>();

            prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f));
            prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f));

            List <IPreference> prefsOfUser789 = new List <IPreference>();

            prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f));
            prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f));

            IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123);

            FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>();

            userData.Put(123L, prefArrayOfUser123);
            userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456));
            userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789));

            IDataModel dataModel = new GenericDataModel(userData);

            ICandidateItemsStrategy strategy =
                new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems());

            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel);

            Assert.True(candidateItems.Count() <= 1);
            Assert.False(candidateItems.Contains(1L));
        }
        public void setUpSyntheticData()
        {
            int numUsers = 2000;
            int numItems = 1000;
            double sparsity = 0.5;

            this.rank = 20;
            this.lambda = 0.000000001;
            this.numIterations = 100;

            var users = randomMatrix(numUsers, rank, 1);
            var items = randomMatrix(rank, numItems, 1);
            var ratings = times(users, items);
            normalize(ratings, 5);

            FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>();
            for (int userIndex = 0; userIndex < numUsers; userIndex++) {
              List<IPreference> row= new List<IPreference>();
              for (int itemIndex = 0; itemIndex < numItems; itemIndex++) {
            if (random.nextDouble() <= sparsity) {
              row.Add(new GenericPreference(userIndex, itemIndex, (float) ratings[userIndex, itemIndex]));
            }
              }

              userData.Put(userIndex, new GenericUserPreferenceArray(row));
            }

            dataModel = new GenericDataModel(userData);
        }
Exemplo n.º 26
0
 private void initSimilarityMaps(IEnumerator <UserUserSimilarity> similarities)
 {
     while (similarities.MoveNext())
     {
         UserUserSimilarity uuc = similarities.Current;
         long similarityUser1   = uuc.getUserID1();
         long similarityUser2   = uuc.getUserID2();
         if (similarityUser1 != similarityUser2)
         {
             // Order them -- first key should be the "smaller" one
             long user1;
             long user2;
             if (similarityUser1 < similarityUser2)
             {
                 user1 = similarityUser1;
                 user2 = similarityUser2;
             }
             else
             {
                 user1 = similarityUser2;
                 user2 = similarityUser1;
             }
             FastByIDMap <Double> map = similarityMaps.Get(user1);
             if (map == null)
             {
                 map = new FastByIDMap <Double>();
                 similarityMaps.Put(user1, map);
             }
             map.Put(user2, uuc.getValue());
         }
         // else similarity between user and itself already assumed to be 1.0
     }
 }
Exemplo n.º 27
0
        public double UserSimilarity(long userID1, long userID2)
        {
            if (userID1 == userID2)
            {
                return(1.0);
            }
            long first;
            long second;

            if (userID1 < userID2)
            {
                first  = userID1;
                second = userID2;
            }
            else
            {
                first  = userID2;
                second = userID1;
            }
            FastByIDMap <Double> nextMap = similarityMaps.Get(first);

            if (nextMap == null)
            {
                return(Double.NaN);
            }
            Double similarity = nextMap.Get(second);

            return(similarity == null ? Double.NaN : similarity);
        }
Exemplo n.º 28
0
 public Factorization(FastByIDMap <int?> userIDMapping, FastByIDMap <int?> itemIDMapping, double[][] userFeatures,
                      double[][] itemFeatures)
 {
     this.userIDMapping = userIDMapping; //Preconditions.checkNotNull(
     this.itemIDMapping = itemIDMapping; //Preconditions.checkNotNull();
     this.userFeatures  = userFeatures;
     this.itemFeatures  = itemFeatures;
 }
Exemplo n.º 29
0
        public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
        {
            this.similarityMaps = new FastByIDMap <FastByIDMap <double> >();
            long[] itemIDs = longIteratorToList(dataModel.getUserIDs());
            IEnumerator <UserUserSimilarity> allSimilarities = new DataModelSimilaritiesIterator(otherSimilarity, itemIDs);

            this.initSimilarityMaps(TopItems.getTopUserUserSimilarities(maxToKeep, allSimilarities).GetEnumerator());
        }
 private static FastByIDMap<int?> createIDMapping(int size, IEnumerator<long> idIterator) {
   var mapping = new FastByIDMap<int?>(size);
   int index = 0;
   while (idIterator.MoveNext()) {
     mapping.Put(idIterator.Current, index++);
   }
   return mapping;
 }
 public void testGrow()
 {
     FastByIDMap<String> map = new FastByIDMap<String>(1,1);
     map.Put(500000L, "alpha");
     map.Put(47L, "bang");
     Assert.IsNull(map.Get(500000L));
     Assert.AreEqual("bang", map.Get(47L));
 }
Exemplo n.º 32
0
        /// <summary>
        /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This
        /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable.
        /// </summary>
        /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param>
        /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param>
        public GenericDataModel(FastByIDMap<IPreferenceArray> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps)
        {
            //Preconditions.checkArgument(userData != null, "userData is null");

            this.preferenceFromUsers = userData;
            FastByIDMap<IList<IPreference>> prefsForItems = new FastByIDMap<IList<IPreference>>();
            FastIDSet itemIDSet = new FastIDSet();
            int currentCount = 0;
            float maxPrefValue = float.NegativeInfinity;
            float minPrefValue = float.PositiveInfinity;
            foreach (var entry in preferenceFromUsers.EntrySet()) {
            IPreferenceArray prefs = entry.Value;
            prefs.SortByItem();
            foreach (IPreference preference in prefs) {
            long itemID = preference.GetItemID();
            itemIDSet.Add(itemID);
            var prefsForItem = prefsForItems.Get(itemID);
            if (prefsForItem == null) {
                prefsForItem = new List<IPreference>(2);
                prefsForItems.Put(itemID, prefsForItem);
            }
            prefsForItem.Add(preference);
            float value = preference.GetValue();
            if (value > maxPrefValue) {
                maxPrefValue = value;
            }
            if (value < minPrefValue) {
                minPrefValue = value;
            }
            }
            if (++currentCount % 10000 == 0) {
            log.Info("Processed {0} users", currentCount);
            }
            }
            log.Info("Processed {0} users", currentCount);

            setMinPreference(minPrefValue);
            setMaxPreference(maxPrefValue);

            this.itemIDs = itemIDSet.ToArray();
            itemIDSet = null; // Might help GC -- this is big
            Array.Sort(itemIDs);

            this.preferenceForItems = ToDataMap(prefsForItems, false);

            foreach (var entry in preferenceForItems.EntrySet()) {
            entry.Value.SortByUser();
            }

            this.userIDs = new long[userData.Count()];
            int i = 0;
            foreach (var v in userData.Keys) {
            userIDs[i++] = v;
            }
            Array.Sort(userIDs);

            this.timestamps = timestamps;
        }
 public ItemAverageRecommender(IDataModel dataModel) : base(dataModel)
 {
     this.itemAverages  = new FastByIDMap <IRunningAverage>();
     this.refreshHelper = new RefreshHelper(() => {
         buildAverageDiffs();
     });
     refreshHelper.AddDependency(dataModel);
     buildAverageDiffs();
 }
Exemplo n.º 34
0
        private static void removeTimestamp(long userID, long itemID, FastByIDMap <FastByIDMap <DateTime?> > timestamps)
        {
            FastByIDMap <DateTime?> map = timestamps.get(userID);

            if (map != null)
            {
                map.remove(itemID);
            }
        }
Exemplo n.º 35
0
        public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
        {
            this.similarityMaps      = new FastByIDMap <FastByIDMap <double?> >();
            this.similarItemIDsIndex = new FastByIDMap <FastIDSet>();
            long[] itemIDs = GenericUserSimilarity.longIteratorToList(dataModel.getItemIDs());
            DataModelSimilaritiesIterator allSimilarities = new DataModelSimilaritiesIterator(otherSimilarity, itemIDs);

            this.initSimilarityMaps(TopItems.getTopItemItemSimilarities(maxToKeep, allSimilarities).GetEnumerator());
        }
Exemplo n.º 36
0
        private long processLine(String line, long averageCount)
        {
            if (string.IsNullOrEmpty(line) || line[0] == COMMENT_CHAR)
            {
                return(averageCount);
            }

            String[] tokens = SEPARATOR.Split(line);
            //Preconditions.checkArgument(tokens.length >= 3 && tokens.length != 5, "Bad line: %s", line);

            long   itemID1 = long.Parse(tokens[0]);
            long   itemID2 = long.Parse(tokens[1]);
            double diff    = double.Parse(tokens[2]);
            int    count   = tokens.Length >= 4 ? int.Parse(tokens[3]) : 1;
            bool   hasMkSk = tokens.Length >= 5;

            if (itemID1 > itemID2)
            {
                long temp = itemID1;
                itemID1 = itemID2;
                itemID2 = temp;
            }

            FastByIDMap <RunningAverage> level1Map = averageDiffs.get(itemID1);

            if (level1Map == null)
            {
                level1Map = new FastByIDMap <RunningAverage>();
                averageDiffs.put(itemID1, level1Map);
            }
            RunningAverage average = level1Map.get(itemID2);

            if (average != null)
            {
                throw new Exception("Duplicated line for item-item pair " + itemID1 + " / " + itemID2);
            }
            if (averageCount < maxEntries)
            {
                if (hasMkSk)
                {
                    double mk = Double.Parse(tokens[4]);
                    double sk = Double.Parse(tokens[5]);
                    average = new FullRunningAverageAndStdDev(count, diff, mk, sk);
                }
                else
                {
                    average = new FullRunningAverage(count, diff);
                }
                level1Map.put(itemID2, average);
                averageCount++;
            }

            allRecommendableItemIDs.add(itemID1);
            allRecommendableItemIDs.add(itemID2);

            return(averageCount);
        }
 public void testClear()
 {
     FastByIDMap<long?> map = new FastByIDMap<long?>();
     map.Put(500000L, 2L);
     map.Clear();
     Assert.AreEqual(0, map.Count());
     Assert.True(map.IsEmpty());
     Assert.IsNull(map.Get(500000L));
 }
Exemplo n.º 38
0
 private void reload() {
   if (reloadLock.tryLock()) {
     try {
       longToString = buildMapping();
     } catch (IOException ioe) {
       throw new InvalidOperationException(ioe);
     } finally {
       reloadLock.unlock();
     }
   }
 }
Exemplo n.º 39
0
        private static void addDatumAndCreateIfNeeded(long itemID, float value, FastByIDMap <IRunningAverage> averages)
        {
            IRunningAverage itemAverage = averages.Get(itemID);

            if (itemAverage == null)
            {
                itemAverage = new FullRunningAverage();
                averages.Put(itemID, itemAverage);
            }
            itemAverage.AddDatum(value);
        }
 public void testMaxSize()
 {
     FastByIDMap<String> map = new FastByIDMap<String>();
     map.Put(4, "bang");
     Assert.AreEqual(1, map.Count());
     map.Put(47L, "bang");
     Assert.AreEqual(2, map.Count());
     Assert.IsNull(map.Get(500000L));
     map.Put(47L, "buzz");
     Assert.AreEqual(2, map.Count());
     Assert.AreEqual("buzz", map.Get(47L));
 }
Exemplo n.º 41
0
  public FileIDMigrator(File dataFile, long minReloadIntervalMS) {
    longToString = new FastByIDMap<String>(100);
    this.dataFile = Preconditions.checkNotNull(dataFile);
    if (!dataFile.exists() || dataFile.isDirectory()) {
      throw new FileNotFoundException(dataFile.toString());
    }

    log.info("Creating FileReadonlyIDMigrator for file {}", dataFile);

    this.reloadLock = new ReentrantLock();
    this.lastModified = dataFile.lastModified();
    this.minReloadIntervalMS = minReloadIntervalMS;

    reload();
  }
        public void testGetNumUsersWithDelegateUsersOnly()
        {
            IPreferenceArray prefs = new GenericUserPreferenceArray(1);
            long sampleUserID = 1;
            prefs.SetUserID(0, sampleUserID);
            long sampleItemID = 11;
            prefs.SetItemID(0, sampleItemID);

            FastByIDMap<IPreferenceArray> delegatePreferences = new FastByIDMap<IPreferenceArray>();
            delegatePreferences.Put(sampleUserID, prefs);

            PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences);

            Assert.AreEqual(1, instance.GetNumUsers());
        }
Exemplo n.º 43
0
 public static IDataModel getDataModel(long[] userIDs, double?[][] prefValues)
 {
     FastByIDMap<IPreferenceArray> result = new FastByIDMap<IPreferenceArray>();
     for (int i = 0; i < userIDs.Length; i++) {
     List<IPreference> prefsList = new List<IPreference>();
       for (int j = 0; j < prefValues[i].Length; j++) {
     if (prefValues[i][j].HasValue) {
       prefsList.Add(new GenericPreference(userIDs[i], j, (float) prefValues[i][j].Value ));
     }
       }
       if (prefsList.Count>0) {
     result.Put(userIDs[i], new GenericUserPreferenceArray(prefsList));
       }
     }
     return new GenericDataModel(result);
 }
Exemplo n.º 44
0
 public static IDataModel getBooleanDataModel(long[] userIDs, bool[][] prefs)
 {
     FastByIDMap<FastIDSet> result = new FastByIDMap<FastIDSet>();
     for (int i = 0; i < userIDs.Length; i++) {
       FastIDSet prefsSet = new FastIDSet();
       for (int j = 0; j < prefs[i].Length; j++) {
     if (prefs[i][j]) {
       prefsSet.Add(j);
     }
       }
       if (!prefsSet.IsEmpty()) {
     result.Put(userIDs[i], prefsSet);
       }
     }
     return new GenericBooleanPrefDataModel(result);
 }
  public void ProcessOtherUser(long userID,
                               FastIDSet relevantItemIDs,
                               FastByIDMap<IPreferenceArray> trainingUsers,
                               long otherUserID,
                               IDataModel dataModel) {
    IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID);
    // If we're dealing with the very user that we're evaluating for precision/recall,
    if (userID == otherUserID) {
      // then must remove all the test IDs, the "relevant" item IDs
      List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length());
      foreach (IPreference pref in prefs2Array) {
		  if (!relevantItemIDs.Contains(pref.GetItemID())) {
			  prefs2.Add(pref);
		  }
      }

      if (prefs2.Count>0) {
        trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2));
      }
    } else {
      // otherwise just add all those other user's prefs
      trainingUsers.Put(otherUserID, prefs2Array);
    }
  }
   /// <p>
   /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This
   /// {@link DataModel} retains all this information in memory and is effectively immutable.
   /// </p>
   ///
   /// @param userData users to include
   /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch.
   ///  User IDs are mapped to maps of item IDs to long timestamps.
  public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) {
    //Preconditions.checkArgument(userData != null, "userData is null");

    this.preferenceFromUsers = userData;
    this.preferenceForItems = new FastByIDMap<FastIDSet>();
    FastIDSet itemIDSet = new FastIDSet();
    foreach (var entry in preferenceFromUsers.EntrySet()) {
      long userID = entry.Key;
      FastIDSet itemIDs1 = entry.Value;
      itemIDSet.AddAll(itemIDs1);
      var it = itemIDs1.GetEnumerator();
      while (it.MoveNext()) {
        long itemID = it.Current;
        FastIDSet userIDs1 = preferenceForItems.Get(itemID);
        if (userIDs1 == null) {
          userIDs1 = new FastIDSet(2);
          preferenceForItems.Put(itemID, userIDs1);
        }
        userIDs1.Add(userID);
      }
    }

    this.itemIDs = itemIDSet.ToArray();
    itemIDSet = null; // Might help GC -- this is big
    Array.Sort(itemIDs);

    this.userIDs = new long[userData.Count()];
    int i = 0;
    var it1 = userData.Keys.GetEnumerator();
    while (it1.MoveNext()) {
      userIDs[i++] = it1.Current;
    }
    Array.Sort(userIDs);

    this.timestamps = timestamps;
  }
 public IDataModel BuildDataModel(FastByIDMap<IPreferenceArray> trainingData)
 {
     return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(trainingData));
 }
  public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<IPreferenceArray> data) {
    var res = new FastByIDMap<FastIDSet>( data.Count() );
	foreach (var entry in data.EntrySet()) {
      IPreferenceArray prefArray = entry.Value;
      int size = prefArray.Length();
      FastIDSet itemIDs = new FastIDSet(size);
      for (int i = 0; i < size; i++) {
        itemIDs.Add(prefArray.GetItemID(i));
      }
	 
	  res.Put( entry.Key, itemIDs );
    }
	return res;
  }
	/// <summary>
	/// Creates a new <see cref="GenericBooleanPrefDataModel"/> from the given users (and their preferences). This
	/// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable.
	/// </summary>
	public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData)
		: this(userData, null) {
    
	}
Exemplo n.º 50
0
        public override void SetUp()
        {
            base.SetUp();
            FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>();

            userData.Put(1L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(1L, 1L, 5.0f),
                                                                  new GenericPreference(1L, 2L, 5.0f),
                                                                  new GenericPreference(1L, 3L, 2.0f) } ));

            userData.Put(2L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(2L, 1L, 2.0f),
                                                                  new GenericPreference(2L, 3L, 3.0f),
                                                                  new GenericPreference(2L, 4L, 5.0f) } ));

            userData.Put(3L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(3L, 2L, 5.0f),
                                                                  new GenericPreference(3L, 4L, 3.0f) } ));

            userData.Put(4L, new GenericUserPreferenceArray(new List<IPreference>() {new GenericPreference(4L, 1L, 3.0f),
                                                                  new GenericPreference(4L, 4L, 5.0f)}));

            dataModel = new GenericDataModel(userData);
            factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 10);
        }
Exemplo n.º 51
0
 private FastByIDMap<String> buildMapping() {
   FastByIDMap<String> mapping = new FastByIDMap<String>();
   for (String line : new FileLineIterable(dataFile)) {
     mapping.put(tolongID(line), line);
   }
   lastModified = dataFile.lastModified();
   return mapping;
 }
 private static FastByIDMap<String> buildTestFastMap()
 {
     FastByIDMap<String> map = new FastByIDMap<String>();
     map.Put(500000L, "alpha");
     map.Put(47L, "bang");
     map.Put(2L, "beta");
     return map;
 }
  public virtual double Evaluate(IRecommenderBuilder recommenderBuilder,
                         IDataModelBuilder dataModelBuilder,
                         IDataModel dataModel,
                         double trainingPercentage,
                         double evaluationPercentage) {
    //Preconditions.checkNotNull(recommenderBuilder);
    //Preconditions.checkNotNull(dataModel);
    //Preconditions.checkArgument(trainingPercentage >= 0.0 && trainingPercentage <= 1.0,
    //  "Invalid trainingPercentage: " + trainingPercentage + ". Must be: 0.0 <= trainingPercentage <= 1.0");
    //Preconditions.checkArgument(evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0,
    //  "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 <= evaluationPercentage <= 1.0");

    log.Info("Beginning evaluation using {} of {}", trainingPercentage, dataModel);
    
    int numUsers = dataModel.GetNumUsers();
    FastByIDMap<IPreferenceArray> trainingPrefs = new FastByIDMap<IPreferenceArray>(
        1 + (int) (evaluationPercentage * numUsers));
    FastByIDMap<IPreferenceArray> testPrefs = new FastByIDMap<IPreferenceArray>(
        1 + (int) (evaluationPercentage * numUsers));
    
    var it = dataModel.GetUserIDs();
    while (it.MoveNext()) {
      long userID = it.Current;
      if (random.nextDouble() < evaluationPercentage) {
        splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel);
      }
    }
    
    IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingPrefs)
        : dataModelBuilder.BuildDataModel(trainingPrefs);
    
    IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);
    
    double result = getEvaluation(testPrefs, recommender);
    log.Info("Evaluation result: {}", result);
    return result;
  }
  private double getEvaluation(FastByIDMap<IPreferenceArray> testPrefs, IRecommender recommender)
  {
    reset();
    var estimateCallables = new List<Action>();
    AtomicInteger noEstimateCounter = new AtomicInteger();
    foreach (var entry in testPrefs.EntrySet()) {
      estimateCallables.Add( () => {
		  var testUserID = entry.Key;
		  var prefs = entry.Value;

		  foreach (IPreference realPref in prefs) {
			float estimatedPreference = float.NaN;
			try {
			  estimatedPreference = recommender.EstimatePreference(testUserID, realPref.GetItemID());
			} catch (NoSuchUserException nsue) {
			  // It's possible that an item exists in the test data but not training data in which case
			  // NSEE will be thrown. Just ignore it and move on.
			  log.Info("User exists in test data but not training data: {}", testUserID);
			} catch (NoSuchItemException nsie) {
			  log.Info("Item exists in test data but not training data: {}", realPref.GetItemID());
			}
			if (float.IsNaN(estimatedPreference)) {
			  noEstimateCounter.incrementAndGet();
			} else {
			  estimatedPreference = capEstimatedPreference(estimatedPreference);
			  processOneEstimate(estimatedPreference, realPref);
			}
		  }


	  });
         // new PreferenceEstimateCallable(recommender, entry.Key, entry.Value, noEstimateCounter));
    }
    log.Info("Beginning evaluation of {} users", estimateCallables.Count);
    IRunningAverageAndStdDev timing = new FullRunningAverageAndStdDev();
    execute(estimateCallables, noEstimateCounter, timing);
    return computeFinalEvaluation();
  }
 private void splitOneUsersPrefs(double trainingPercentage,
                                 FastByIDMap<IPreferenceArray> trainingPrefs,
                                 FastByIDMap<IPreferenceArray> testPrefs,
                                 long userID,
                                 IDataModel dataModel) {
   List<IPreference> oneUserTrainingPrefs = null;
   List<IPreference> oneUserTestPrefs = null;
   IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);
   int size = prefs.Length();
   for (int i = 0; i < size; i++) {
     IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i));
     if (random.nextDouble() < trainingPercentage) {
       if (oneUserTrainingPrefs == null) {
         oneUserTrainingPrefs = new List<IPreference>(3);
       }
       oneUserTrainingPrefs.Add(newPref);
     } else {
       if (oneUserTestPrefs == null) {
         oneUserTestPrefs = new List<IPreference>(3);
       }
       oneUserTestPrefs.Add(newPref);
     }
   }
   if (oneUserTrainingPrefs != null) {
     trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs));
     if (oneUserTestPrefs != null) {
       testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs));
     }
   }
 }
 public void testSizeEmpty()
 {
     FastByIDMap<long> map = new FastByIDMap<long>();
     Assert.AreEqual(0, map.Count());
     Assert.True(map.IsEmpty());
     map.Put(500000L, 2L);
     Assert.AreEqual(1, map.Count());
     Assert.False(map.IsEmpty());
     map.Remove(500000L);
     Assert.AreEqual(0, map.Count());
     Assert.True(map.IsEmpty());
 }
  public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                               IDataModelBuilder dataModelBuilder,
                               IDataModel dataModel,
                               IDRescorer rescorer,
                               int at,
                               double relevanceThreshold,
                               double evaluationPercentage) {

    //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
    //Preconditions.checkArgument(dataModel != null, "dataModel is null");
    //Preconditions.checkArgument(at >= 1, "at must be at least 1");
    //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
    //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

    int numItems = dataModel.GetNumItems();
    IRunningAverage precision = new FullRunningAverage();
    IRunningAverage recall = new FullRunningAverage();
    IRunningAverage fallOut = new FullRunningAverage();
    IRunningAverage nDCG = new FullRunningAverage();
    int numUsersRecommendedFor = 0;
    int numUsersWithRecommendations = 0;

    var it = dataModel.GetUserIDs();
    while (it.MoveNext()) {

      long userID = it.Current;

      if (random.nextDouble() >= evaluationPercentage) {
        // Skipped
        continue;
      }

	  var stopWatch = new System.Diagnostics.Stopwatch();
	  stopWatch.Start();

      IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

      // List some most-preferred items that would count as (most) "relevant" results
      double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
      FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

      int numRelevantItems = relevantItemIDs.Count();
      if (numRelevantItems <= 0) {
        continue;
      }

      FastByIDMap<IPreferenceArray> trainingUsers = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers());
      var it2 = dataModel.GetUserIDs();
      while (it2.MoveNext()) {
        dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
      }

      IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
      try {
        trainingModel.GetPreferencesFromUser(userID);
      } catch (NoSuchUserException nsee) {
        continue; // Oops we excluded all prefs for the user -- just move on
      }

      int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
      if (size < 2 * at) {
        // Really not enough prefs to meaningfully evaluate this user
        continue;
      }

      IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

      int intersectionSize = 0;
      var recommendedItems = recommender.Recommend(userID, at, rescorer);
      foreach (IRecommendedItem recommendedItem in recommendedItems) {
        if (relevantItemIDs.Contains(recommendedItem.GetItemID())) {
          intersectionSize++;
        }
      }

      int numRecommendedItems = recommendedItems.Count;

      // Precision
      if (numRecommendedItems > 0) {
        precision.AddDatum((double) intersectionSize / (double) numRecommendedItems);
      }

      // Recall
      recall.AddDatum((double) intersectionSize / (double) numRelevantItems);

      // Fall-out
      if (numRelevantItems < size) {
        fallOut.AddDatum((double) (numRecommendedItems - intersectionSize)
                         / (double) (numItems - numRelevantItems));
      }

      // nDCG
      // In computing, assume relevant IDs have relevance 1 and others 0
      double cumulativeGain = 0.0;
      double idealizedGain = 0.0;
      for (int i = 0; i < numRecommendedItems; i++) {
        IRecommendedItem item = recommendedItems[i];
        double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
        if (relevantItemIDs.Contains(item.GetItemID())) {
          cumulativeGain += discount;
        }
        // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

        // Ideally results would be ordered with all relevant ones first, so this theoretical
        // ideal list starts with number of relevant items equal to the total number of relevant items
        if (i < numRelevantItems) {
          idealizedGain += discount;
        }
      }
      if (idealizedGain > 0.0) {
        nDCG.AddDatum(cumulativeGain / idealizedGain);
      }

      // Reach
      numUsersRecommendedFor++;
      if (numRecommendedItems > 0) {
        numUsersWithRecommendations++;
      }

	  stopWatch.Stop();

      log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
      log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
               precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
               (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
    }

    return new IRStatisticsImpl(
        precision.GetAverage(),
        recall.GetAverage(),
        fallOut.GetAverage(),
        nDCG.GetAverage(),
        (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
  }
 public void testPutAndGet()
 {
     FastByIDMap<long?> map = new FastByIDMap<long?>();
     Assert.IsNull(map.Get(500000L));
     map.Put(500000L, 2L);
     Assert.AreEqual(2L, (long) map.Get(500000L));
 }
Exemplo n.º 59
0
 public MemoryIDMigrator() {
   this.longToString = new FastByIDMap<String>(100);
 }
 public void testVersusHashMap()
 {
     FastByIDMap<String> actual = new FastByIDMap<String>();
     IDictionary<long, string> expected = new Dictionary<long,string>(1000000);
     var r = RandomUtils.getRandom();
     for (int i = 0; i < 1000000; i++) {
       double d = r.nextDouble();
       long key = (long) r.nextInt(100);
       if (d < 0.4) {
     Assert.AreEqual( expected.ContainsKey(key)?expected[key]:null, actual.Get(key));
       } else {
     if (d < 0.7) {
     var expectedOldVal = expected.ContainsKey(key) ? expected[key] : null;
     expected[key] = "bang";
     Assert.AreEqual(expectedOldVal, actual.Put(key, "bang"));
     } else {
     var expectedOldVal = expected.ContainsKey(key) ? expected[key] : null;
     expected.Remove(key);
     Assert.AreEqual(expectedOldVal, actual.Remove(key));
     }
     Assert.AreEqual(expected.Count, actual.Count());
     Assert.AreEqual(expected.Count==0, actual.IsEmpty());
       }
     }
 }