Exemplo n.º 1
0
        public double userSimilarity(long userID1, long userID2)
        {
            DataModel model = base.getDataModel();
            FastIDSet other = model.getItemIDsFromUser(userID1);
            FastIDSet set2  = model.getItemIDsFromUser(userID2);
            int       num   = other.size();
            int       num2  = set2.size();

            if ((num == 0) && (num2 == 0))
            {
                return(double.NaN);
            }
            if ((num == 0) || (num2 == 0))
            {
                return(0.0);
            }
            int num3 = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2);

            if (num3 == 0)
            {
                return(double.NaN);
            }
            int num4 = (num + num2) - num3;

            return(((double)num3) / ((double)num4));
        }
Exemplo n.º 2
0
        public void ProcessOtherUser(long userID,
                                     FastIDSet relevantItemIDs,
                                     FastByIDMap <IPreferenceArray> trainingUsers,
                                     long otherUserID,
                                     IDataModel dataModel)
        {
            IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID);

            // If we're dealing with the very user that we're evaluating for precision/recall,
            if (userID == otherUserID)
            {
                // then must remove all the test IDs, the "relevant" item IDs
                List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length());
                foreach (IPreference pref in prefs2Array)
                {
                    if (!relevantItemIDs.Contains(pref.GetItemID()))
                    {
                        prefs2.Add(pref);
                    }
                }

                if (prefs2.Count > 0)
                {
                    trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2));
                }
            }
            else
            {
                // otherwise just add all those other user's prefs
                trainingUsers.Put(otherUserID, prefs2Array);
            }
        }
        public static void Evaluate(IRecommender recommender,
                                    IDataModel model,
                                    int samples,
                                    IRunningAverage tracker,
                                    String tag)
        {
            printHeader();
            var users = recommender.GetDataModel().GetUserIDs();

            while (users.MoveNext())
            {
                long             userID = users.Current;
                var              recs1  = recommender.Recommend(userID, model.GetNumItems());
                IPreferenceArray prefs2 = model.GetPreferencesFromUser(userID);
                prefs2.SortByValueReversed();
                FastIDSet commonSet = new FastIDSet();
                long      maxItemID = setBits(commonSet, recs1, samples);
                FastIDSet otherSet  = new FastIDSet();
                maxItemID = Math.Max(maxItemID, setBits(otherSet, prefs2, samples));
                int max = mask(commonSet, otherSet, maxItemID);
                max = Math.Min(max, samples);
                if (max < 2)
                {
                    continue;
                }
                long[] items1   = getCommonItems(commonSet, recs1, max);
                long[] items2   = getCommonItems(commonSet, prefs2, max);
                double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
                tracker.AddDatum(variance);
            }
        }
Exemplo n.º 4
0
 public void testContainsAndAdd()
 {
     FastIDSet set = new FastIDSet();
     Assert.False(set.Contains(1));
     set.Add(1);
     Assert.True(set.Contains(1));
 }
        public double getSimilarity(FastIDSet cluster1, FastIDSet cluster2)
        {
            if (cluster1.isEmpty() || cluster2.isEmpty())
            {
                return(Double.NaN);
            }
            double leastSimilarity = Double.PositiveInfinity;
            var    someUsers       = SamplingLongPrimitiveIterator.maybeWrapIterator(cluster1.GetEnumerator(), samplingRate);

            while (someUsers.MoveNext())
            {
                long userID1 = someUsers.Current;
                var  it2     = cluster2.GetEnumerator();
                while (it2.MoveNext())
                {
                    double theSimilarity = similarity.userSimilarity(userID1, it2.Current);
                    if (theSimilarity < leastSimilarity)
                    {
                        leastSimilarity = theSimilarity;
                    }
                }
            }
            // We skipped everything? well, at least try comparing the first Users to get some value
            if (leastSimilarity == Double.PositiveInfinity)
            {
                return(similarity.userSimilarity(cluster1.GetEnumerator().Current, cluster2.GetEnumerator().Current));
            }
            return(leastSimilarity);
        }
Exemplo n.º 6
0
        public GenericDataModel(FastByIDMap <PreferenceArray> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps)
        {
            this.preferenceFromUsers = userData;
            FastByIDMap <List <Preference> > data = new FastByIDMap <List <Preference> >();
            FastIDSet set = new FastIDSet();
            int       num = 0;
            float     negativeInfinity = float.NegativeInfinity;
            float     positiveInfinity = float.PositiveInfinity;

            foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceFromUsers.entrySet())
            {
                PreferenceArray array = pair.Value;
                array.sortByItem();
                foreach (Preference preference in array)
                {
                    long key = preference.getItemID();
                    set.add(key);
                    List <Preference> list = data.get(key);
                    if (list == null)
                    {
                        list = new List <Preference>(2);
                        data.put(key, list);
                    }
                    list.Add(preference);
                    float num5 = preference.getValue();
                    if (num5 > negativeInfinity)
                    {
                        negativeInfinity = num5;
                    }
                    if (num5 < positiveInfinity)
                    {
                        positiveInfinity = num5;
                    }
                }
                if ((++num % 0x2710) == 0)
                {
                    log.info("Processed {0} users", new object[] { num });
                }
            }
            log.info("Processed {0} users", new object[] { num });
            this.setMinPreference(positiveInfinity);
            this.setMaxPreference(negativeInfinity);
            this.itemIDs = set.toArray();
            set          = null;
            Array.Sort <long>(this.itemIDs);
            this.preferenceForItems = toDataMap(data, false);
            foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceForItems.entrySet())
            {
                pair.Value.sortByUser();
            }
            this.userIDs = new long[userData.size()];
            int num6 = 0;

            foreach (long num7 in userData.Keys)
            {
                this.userIDs[num6++] = num7;
            }
            Array.Sort <long>(this.userIDs);
            this.timestamps = timestamps;
        }
        public static void evaluate(Recommender recommender, DataModel model, int samples, RunningAverage tracker, string tag)
        {
            printHeader();
            IEnumerator <long> enumerator = recommender.getDataModel().getUserIDs();

            while (enumerator.MoveNext())
            {
                long current = enumerator.Current;
                List <RecommendedItem> items = recommender.recommend(current, model.getNumItems());
                PreferenceArray        prefs = model.getPreferencesFromUser(current);
                prefs.sortByValueReversed();
                FastIDSet modelSet = new FastIDSet();
                long      num2     = setBits(modelSet, items, samples);
                FastIDSet set2     = new FastIDSet();
                num2 = Math.Max(num2, setBits(set2, prefs, samples));
                int max = Math.Min(mask(modelSet, set2, num2), samples);
                if (max >= 2)
                {
                    long[] itemsL = getCommonItems(modelSet, items, max);
                    long[] itemsR = getCommonItems(modelSet, prefs, max);
                    double datum  = scoreCommonSubset(tag, current, samples, max, itemsL, itemsR);
                    tracker.addDatum(datum);
                }
            }
        }
Exemplo n.º 8
0
        private KeyValuePair <FastIDSet, FastIDSet> findNearestClusters(List <FastIDSet> clusters)
        {
            int size = clusters.Count;
            KeyValuePair <FastIDSet, FastIDSet> nearestPair = new KeyValuePair <FastIDSet, FastIDSet>();
            double bestSimilarity = Double.NegativeInfinity;

            for (int i = 0; i < size; i++)
            {
                FastIDSet cluster1 = clusters[i];
                for (int j = i + 1; j < size; j++)
                {
                    if (samplingRate >= 1.0 || random.nextDouble() < samplingRate)
                    {
                        FastIDSet cluster2   = clusters[j];
                        double    similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
                        if (!Double.IsNaN(similarity) && similarity > bestSimilarity)
                        {
                            bestSimilarity = similarity;
                            nearestPair    = new KeyValuePair <FastIDSet, FastIDSet>(cluster1, cluster2);
                        }
                    }
                }
            }
            return(nearestPair);
        }
Exemplo n.º 9
0
        private void buildClusters()
        {
            DataModel model    = getDataModel();
            int       numUsers = model.getNumUsers();

            if (numUsers > 0)
            {
                List <FastIDSet> newClusters = new List <FastIDSet>();
                // Begin with a cluster for each user:
                var it = model.getUserIDs();
                while (it.MoveNext())
                {
                    FastIDSet newCluster = new FastIDSet();
                    newCluster.add(it.Current);
                    newClusters.Add(newCluster);
                }
                if (numUsers > 1)
                {
                    findClusters(newClusters);
                }
                topRecsByUserID  = computeTopRecsPerUserID(newClusters);
                clustersByUserID = computeClustersPerUserID(newClusters);
                allClusters      = newClusters.ToArray();
            }
            else
            {
                topRecsByUserID  = new FastByIDMap <List <RecommendedItem> >();
                clustersByUserID = new FastByIDMap <FastIDSet>();
                allClusters      = NO_CLUSTERS;
            }
        }
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel = getDataModel();
            FastIDSet  xPrefs    = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  yPrefs    = dataModel.GetItemIDsFromUser(userID2);

            int xPrefsSize = xPrefs.Count();
            int yPrefsSize = yPrefs.Count();

            if (xPrefsSize == 0 && yPrefsSize == 0)
            {
                return(Double.NaN);
            }
            if (xPrefsSize == 0 || yPrefsSize == 0)
            {
                return(0.0);
            }

            int intersectionSize =
                xPrefsSize < yPrefsSize?yPrefs.IntersectionSize(xPrefs) : xPrefs.IntersectionSize(yPrefs);

            if (intersectionSize == 0)
            {
                return(Double.NaN);
            }

            int unionSize = xPrefsSize + yPrefsSize - intersectionSize;

            return((double)intersectionSize / (double)unionSize);
        }
        public void testStrategy()
        {
            FastIDSet allItemIDs = new FastIDSet();

            allItemIDs.AddAll(new long[] { 1L, 2L, 3L });

            FastIDSet preferredItemIDs = new FastIDSet(1);

            preferredItemIDs.Add(2L);

            var dataModelMock = new DynamicMock(typeof(IDataModel));

            dataModelMock.ExpectAndReturn("GetNumItems", 3);
            dataModelMock.ExpectAndReturn("GetItemIDs", allItemIDs.GetEnumerator());

            IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(new List <IPreference>()
            {
                new GenericPreference(123L, 2L, 1.0f)
            });

            ICandidateItemsStrategy strategy = new AllUnknownItemsCandidateItemsStrategy();

            //EasyMock.replay(dataModel);


            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance);

            Assert.AreEqual(2, candidateItems.Count());
            Assert.True(candidateItems.Contains(1L));
            Assert.True(candidateItems.Contains(3L));

            dataModelMock.Verify();
            //EasyMock.verify(dataModel);
        }
Exemplo n.º 12
0
        public FastIDSet getCluster(long userID)
        {
            buildClusters();
            FastIDSet cluster = clustersByUserID.get(userID);

            return(cluster == null ? new FastIDSet() : cluster);
        }
Exemplo n.º 13
0
        private void buildClusters()
        {
            DataModel model    = getDataModel();
            int       numUsers = model.getNumUsers();

            if (numUsers == 0)
            {
                topRecsByUserID  = new FastByIDMap <List <RecommendedItem> >();
                clustersByUserID = new FastByIDMap <FastIDSet>();
            }
            else
            {
                List <FastIDSet> clusters = new List <FastIDSet>();
                // Begin with a cluster for each user:
                var it = model.getUserIDs();
                while (it.MoveNext())
                {
                    FastIDSet newCluster = new FastIDSet();
                    newCluster.add(it.Current);
                    clusters.Add(newCluster);
                }

                bool done = false;
                while (!done)
                {
                    done = mergeClosestClusters(numUsers, clusters, done);
                }

                topRecsByUserID  = computeTopRecsPerUserID(clusters);
                clustersByUserID = computeClustersPerUserID(clusters);
                allClusters      = clusters.ToArray();
            }
        }
Exemplo n.º 14
0
        private List <ClusterClusterPair> findClosestClusters(int numUsers, List <FastIDSet> clusters)
        {
            PriorityQueue <ClusterClusterPair> queue = new PriorityQueue <ClusterClusterPair>(numUsers + 1, new ClusterClusterPair());
            int size = clusters.Count;

            for (int i = 0; i < size; i++)
            {
                FastIDSet cluster1 = clusters[i];
                for (int j = i + 1; j < size; j++)
                {
                    FastIDSet cluster2   = clusters[j];
                    double    similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
                    if (!Double.IsNaN(similarity))
                    {
                        if (queue.Count < numUsers)
                        {
                            queue.Push(new ClusterClusterPair(cluster1, cluster2, similarity));
                        }
                        else if (similarity > queue.Pop().getSimilarity())
                        {
                            queue.Push(new ClusterClusterPair(cluster1, cluster2, similarity));
                            queue.Pop();
                        }
                    }
                }
            }
            List <ClusterClusterPair> result = queue.ToList();

            result.Sort();

            return(result);
        }
  public static void Evaluate(IRecommender recommender1,
                              IRecommender recommender2,
                              int samples,
                              IRunningAverage tracker,
                              String tag) {
    printHeader();
    var users = recommender1.GetDataModel().GetUserIDs();

    while (users.MoveNext()) {
      long userID = users.Current;
      var recs1 = recommender1.Recommend(userID, samples);
      var recs2 = recommender2.Recommend(userID, samples);
      FastIDSet commonSet = new FastIDSet();
      long maxItemID = setBits(commonSet, recs1, samples);
      FastIDSet otherSet = new FastIDSet();
      maxItemID = Math.Max(maxItemID, setBits(otherSet, recs2, samples));
      int max = mask(commonSet, otherSet, maxItemID);
      max = Math.Min(max, samples);
      if (max < 2) {
        continue;
      }
      long[] items1 = getCommonItems(commonSet, recs1, max);
      long[] items2 = getCommonItems(commonSet, recs2, max);
      double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
      tracker.AddDatum(variance);
    }
  }
        public void testStrategy()
        {
            FastIDSet allItemIDs = new FastIDSet();
            allItemIDs.AddAll(new long[] { 1L, 2L, 3L });

            FastIDSet preferredItemIDs = new FastIDSet(1);
            preferredItemIDs.Add(2L);

            var dataModelMock = new DynamicMock( typeof( IDataModel ));
            dataModelMock.ExpectAndReturn("GetNumItems", 3);
            dataModelMock.ExpectAndReturn("GetItemIDs", allItemIDs.GetEnumerator());

            IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray( new List<IPreference>() {
            new GenericPreference(123L, 2L, 1.0f) } );

            ICandidateItemsStrategy strategy = new AllUnknownItemsCandidateItemsStrategy();

            //EasyMock.replay(dataModel);

            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance);
            Assert.AreEqual(2, candidateItems.Count() );
            Assert.True(candidateItems.Contains(1L));
            Assert.True(candidateItems.Contains(3L));

            dataModelMock.Verify();
            //EasyMock.verify(dataModel);
        }
        public void testStrategy()
        {
            FastIDSet itemIDsFromUser123 = new FastIDSet();
            itemIDsFromUser123.Add(1L);

            FastIDSet itemIDsFromUser456 = new FastIDSet();
            itemIDsFromUser456.Add(1L);
            itemIDsFromUser456.Add(2L);

            List<IPreference> prefs = new List<IPreference>();
            prefs.Add(new GenericPreference(123L, 1L, 1.0f));
            prefs.Add(new GenericPreference(456L, 1L, 1.0f));
            IPreferenceArray preferencesForItem1 = new GenericItemPreferenceArray(prefs);

            var dataModelMock = new DynamicMock(typeof(IDataModel));
            dataModelMock.ExpectAndReturn("GetPreferencesForItem", preferencesForItem1,  (1L));
            dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser123, (123L));
            dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser456, (456L));

            IPreferenceArray prefArrayOfUser123 =
            new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(123L, 1L, 1.0f)} );

            ICandidateItemsStrategy strategy = new PreferredItemsNeighborhoodCandidateItemsStrategy();

            //EasyMock.replay(dataModel);

            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance);
            Assert.AreEqual(1, candidateItems.Count());
            Assert.True(candidateItems.Contains(2L));

            dataModelMock.Verify(); //  EasyMock.verify(dataModel);
        }
Exemplo n.º 18
0
        public void testStrategy()
        {
            List <IPreference> prefsOfUser123 = new List <IPreference>();

            prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f));

            List <IPreference> prefsOfUser456 = new List <IPreference>();

            prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f));
            prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f));

            List <IPreference> prefsOfUser789 = new List <IPreference>();

            prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f));
            prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f));

            IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123);

            FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>();

            userData.Put(123L, prefArrayOfUser123);
            userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456));
            userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789));

            IDataModel dataModel = new GenericDataModel(userData);

            ICandidateItemsStrategy strategy =
                new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems());

            FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel);

            Assert.True(candidateItems.Count() <= 1);
            Assert.False(candidateItems.Contains(1L));
        }
Exemplo n.º 19
0
        private List <IRecommendedItem> doMostSimilarItems(long[] itemIDs,
                                                           int howMany,
                                                           TopItems.IEstimator <long> estimator)
        {
            FastIDSet possibleItemIDs = mostSimilarItemsCandidateItemsStrategy.GetCandidateItems(itemIDs, GetDataModel());

            return(TopItems.GetTopItems(howMany, possibleItemIDs.GetEnumerator(), null, estimator));
        }
Exemplo n.º 20
0
 public void testGrow()
 {
     FastIDSet set = new FastIDSet(1);
     set.Add(1);
     set.Add(2);
     Assert.True(set.Contains(1));
     Assert.True(set.Contains(2));
 }
 protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) {
   FastIDSet candidateItemIDs = new FastIDSet();
   foreach (long itemID in preferredItemIDs) {
     candidateItemIDs.AddAll(similarity.AllSimilarItemIDs(itemID));
   }
   candidateItemIDs.RemoveAll(preferredItemIDs);
   return candidateItemIDs;
 }
Exemplo n.º 22
0
        /// <summary>
        /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This
        /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable.
        /// </summary>
        /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param>
        /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param>
        public GenericDataModel(FastByIDMap<IPreferenceArray> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps)
        {
            //Preconditions.checkArgument(userData != null, "userData is null");

            this.preferenceFromUsers = userData;
            FastByIDMap<IList<IPreference>> prefsForItems = new FastByIDMap<IList<IPreference>>();
            FastIDSet itemIDSet = new FastIDSet();
            int currentCount = 0;
            float maxPrefValue = float.NegativeInfinity;
            float minPrefValue = float.PositiveInfinity;
            foreach (var entry in preferenceFromUsers.EntrySet()) {
            IPreferenceArray prefs = entry.Value;
            prefs.SortByItem();
            foreach (IPreference preference in prefs) {
            long itemID = preference.GetItemID();
            itemIDSet.Add(itemID);
            var prefsForItem = prefsForItems.Get(itemID);
            if (prefsForItem == null) {
                prefsForItem = new List<IPreference>(2);
                prefsForItems.Put(itemID, prefsForItem);
            }
            prefsForItem.Add(preference);
            float value = preference.GetValue();
            if (value > maxPrefValue) {
                maxPrefValue = value;
            }
            if (value < minPrefValue) {
                minPrefValue = value;
            }
            }
            if (++currentCount % 10000 == 0) {
            log.Info("Processed {0} users", currentCount);
            }
            }
            log.Info("Processed {0} users", currentCount);

            setMinPreference(minPrefValue);
            setMaxPreference(maxPrefValue);

            this.itemIDs = itemIDSet.ToArray();
            itemIDSet = null; // Might help GC -- this is big
            Array.Sort(itemIDs);

            this.preferenceForItems = ToDataMap(prefsForItems, false);

            foreach (var entry in preferenceForItems.EntrySet()) {
            entry.Value.SortByUser();
            }

            this.userIDs = new long[userData.Count()];
            int i = 0;
            foreach (var v in userData.Keys) {
            userIDs[i++] = v;
            }
            Array.Sort(userIDs);

            this.timestamps = timestamps;
        }
 protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) {
   FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems());
   var allItemIDs = dataModel.GetItemIDs();
   while (allItemIDs.MoveNext()) {
     possibleItemIDs.Add(allItemIDs.Current);
   }
   possibleItemIDs.RemoveAll(preferredItemIDs);
   return possibleItemIDs;
 }
Exemplo n.º 24
0
 public void testClear()
 {
     FastIDSet set = new FastIDSet();
     set.Add(1);
     set.Clear();
     Assert.AreEqual(0, set.Count());
     Assert.True(set.IsEmpty());
     Assert.False(set.Contains(1));
 }
 protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID) {
   IDataModel dataModel = GetDataModel();
   FastIDSet possibleItemIDs = new FastIDSet();
   foreach (long userID in theNeighborhood) {
     possibleItemIDs.AddAll(dataModel.GetItemIDsFromUser(userID));
   }
   possibleItemIDs.RemoveAll(dataModel.GetItemIDsFromUser(theUserID));
   return possibleItemIDs;
 }
Exemplo n.º 26
0
        protected override void prepareTraining()
        {
            int num;
            int num2;

            base.prepareTraining();
            RandomWrapper wrapper = RandomUtils.getRandom();

            this.p = new double[base.dataModel.getNumUsers()][];
            for (num = 0; num < this.p.Length; num++)
            {
                this.p[num] = new double[base.numFeatures];
                num2        = 0;
                while (num2 < RatingSGDFactorizer.FEATURE_OFFSET)
                {
                    this.p[num][num2] = 0.0;
                    num2++;
                }
                num2 = RatingSGDFactorizer.FEATURE_OFFSET;
                while (num2 < base.numFeatures)
                {
                    this.p[num][num2] = wrapper.nextGaussian() * base.randomNoise;
                    num2++;
                }
            }
            this.y = new double[base.dataModel.getNumItems()][];
            for (num = 0; num < this.y.Length; num++)
            {
                this.y[num] = new double[base.numFeatures];
                num2        = 0;
                while (num2 < RatingSGDFactorizer.FEATURE_OFFSET)
                {
                    this.y[num][num2] = 0.0;
                    num2++;
                }
                for (num2 = RatingSGDFactorizer.FEATURE_OFFSET; num2 < base.numFeatures; num2++)
                {
                    this.y[num][num2] = wrapper.nextGaussian() * base.randomNoise;
                }
            }
            this.itemsByUser = new Dictionary <int, List <int> >();
            IEnumerator <long> enumerator = base.dataModel.getUserIDs();

            while (enumerator.MoveNext())
            {
                long       current = enumerator.Current;
                int        num4    = base.userIndex(current);
                FastIDSet  set     = base.dataModel.getItemIDsFromUser(current);
                List <int> list    = new List <int>(set.size());
                this.itemsByUser[num4] = list;
                foreach (long num5 in set)
                {
                    int item = base.itemIndex(num5);
                    list.Add(item);
                }
            }
        }
        public override FastIDSet GetItemIDsFromUser(long userID)
        {
            FastIDSet itemIDs = preferenceFromUsers.Get(userID);

            if (itemIDs == null)
            {
                throw new NoSuchUserException(userID);
            }
            return(itemIDs);
        }
Exemplo n.º 28
0
        public override FastIDSet getItemIDsFromUser(long userID)
        {
            FastIDSet set = this.preferenceFromUsers.get(userID);

            if (set == null)
            {
                throw new NoSuchUserException(userID);
            }
            return(set);
        }
Exemplo n.º 29
0
        public double UserSimilarity(long userID1, long userID2)
        {
            IDataModel dataModel        = getDataModel();
            FastIDSet  prefs1           = dataModel.GetItemIDsFromUser(userID1);
            FastIDSet  prefs2           = dataModel.GetItemIDsFromUser(userID2);
            int        prefs1Size       = prefs1.Count();
            int        prefs2Size       = prefs2.Count();
            int        intersectionSize = prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2);

            return(doSimilarity(prefs1Size, prefs2Size, intersectionSize));
        }
Exemplo n.º 30
0
        private void findClusters(List <FastIDSet> newClusters)
        {
            if (clusteringByThreshold)
            {
                KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters);
                FastIDSet _cluster1 = nearestPair.Key;
                FastIDSet _cluster2 = nearestPair.Value;

                if (_cluster1 != null && _cluster2 != null)
                {
                    FastIDSet cluster1 = _cluster1;
                    FastIDSet cluster2 = _cluster2;
                    while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold)
                    {
                        newClusters.Remove(cluster1);
                        newClusters.Remove(cluster2);
                        FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size());
                        merged.addAll(cluster1);
                        merged.addAll(cluster2);
                        newClusters.Add(merged);
                        nearestPair = findNearestClusters(newClusters);
                        var __cluster1 = nearestPair.Key;
                        var __cluster2 = nearestPair.Value;
                        if (__cluster1 == null || __cluster2 == null)
                        {
                            break;
                        }
                        cluster1 = __cluster1;
                        cluster2 = __cluster2;
                    }
                }
            }
            else
            {
                while (newClusters.Count > numClusters)
                {
                    KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters);
                    FastIDSet _cluster1 = nearestPair.Key;
                    FastIDSet _cluster2 = nearestPair.Value;
                    if (_cluster1 == null || _cluster2 == null)
                    {
                        break;
                    }
                    FastIDSet cluster1 = _cluster1;
                    FastIDSet cluster2 = _cluster2;
                    newClusters.Remove(cluster1);
                    newClusters.Remove(cluster2);
                    FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size());
                    merged.addAll(cluster1);
                    merged.addAll(cluster2);
                    newClusters.Add(merged);
                }
            }
        }
Exemplo n.º 31
0
        protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel)
        {
            FastIDSet set = new FastIDSet();

            foreach (long num in preferredItemIDs)
            {
                set.addAll(this.similarity.allSimilarItemIDs(num));
            }
            set.removeAll(preferredItemIDs);
            return(set);
        }
Exemplo n.º 32
0
        private void doIndex(long fromItemID, long toItemID)
        {
            FastIDSet similarItemIDs = similarItemIDsIndex.Get(fromItemID);

            if (similarItemIDs == null)
            {
                similarItemIDs = new FastIDSet();
                similarItemIDsIndex.Put(fromItemID, similarItemIDs);
            }
            similarItemIDs.Add(toItemID);
        }
 public virtual long[] AllSimilarItemIDs(long itemID) {
   FastIDSet allSimilarItemIDs = new FastIDSet();
   var allItemIDs = dataModel.GetItemIDs();
   while (allItemIDs.MoveNext()) {
     long possiblySimilarItemID = allItemIDs.Current;
     if (!Double.IsNaN(ItemSimilarity(itemID, possiblySimilarItemID))) {
       allSimilarItemIDs.Add(possiblySimilarItemID);
     }
   }
   return allSimilarItemIDs.ToArray();
 }
Exemplo n.º 34
0
        protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel)
        {
            FastIDSet candidateItemIDs = new FastIDSet();

            foreach (long itemID in preferredItemIDs)
            {
                candidateItemIDs.AddAll(similarity.AllSimilarItemIDs(itemID));
            }
            candidateItemIDs.RemoveAll(preferredItemIDs);
            return(candidateItemIDs);
        }
Exemplo n.º 35
0
        private void doIndex(long fromItemID, long toItemID)
        {
            FastIDSet set = this.similarItemIDsIndex.get(fromItemID);

            if (set == null)
            {
                set = new FastIDSet();
                this.similarItemIDsIndex.put(fromItemID, set);
            }
            set.add(toItemID);
        }
        public void setTempPrefs(PreferenceArray prefs, long anonymousUserID)
        {
            this.tempPrefs[anonymousUserID] = prefs;
            FastIDSet set = new FastIDSet();

            for (int i = 0; i < prefs.length(); i++)
            {
                set.add(prefs.getItemID(i));
            }
            this.prefItemIDs[anonymousUserID] = set;
        }
Exemplo n.º 37
0
        public double userSimilarity(long userID1, long userID2)
        {
            DataModel model        = base.getDataModel();
            FastIDSet other        = model.getItemIDsFromUser(userID1);
            FastIDSet set2         = model.getItemIDsFromUser(userID2);
            int       num          = other.size();
            int       num2         = set2.size();
            int       intersection = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2);

            return(doSimilarity(num, num2, intersection));
        }
 protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) {
   FastIDSet possibleItemsIDs = new FastIDSet();
   foreach (long itemID in preferredItemIDs) {
     IPreferenceArray itemPreferences = dataModel.GetPreferencesForItem(itemID);
     int numUsersPreferringItem = itemPreferences.Length();
     for (int index = 0; index < numUsersPreferringItem; index++) {
       possibleItemsIDs.AddAll(dataModel.GetItemIDsFromUser(itemPreferences.GetUserID(index)));
     }
   }
   possibleItemsIDs.RemoveAll(preferredItemIDs);
   return possibleItemsIDs;
 }
        protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel)
        {
            FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems());
            var       allItemIDs      = dataModel.GetItemIDs();

            while (allItemIDs.MoveNext())
            {
                possibleItemIDs.Add(allItemIDs.Current);
            }
            possibleItemIDs.RemoveAll(preferredItemIDs);
            return(possibleItemIDs);
        }
Exemplo n.º 40
0
        public override FastIDSet GetItemIDsFromUser(long userID)
        {
            IPreferenceArray prefs = GetPreferencesFromUser(userID);
            int       size         = prefs.Length();
            FastIDSet result       = new FastIDSet(size);

            for (int i = 0; i < size; i++)
            {
                result.Add(prefs.GetItemID(i));
            }
            return(result);
        }
Exemplo n.º 41
0
        protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID)
        {
            DataModel model = this.getDataModel();
            FastIDSet set   = new FastIDSet();

            foreach (long num in theNeighborhood)
            {
                set.addAll(model.getItemIDsFromUser(num));
            }
            set.removeAll(model.getItemIDsFromUser(theUserID));
            return(set);
        }
Exemplo n.º 42
0
        public override FastIDSet getItemIDsFromUser(long userID)
        {
            PreferenceArray array = this.getPreferencesFromUser(userID);
            int             size  = array.length();
            FastIDSet       set   = new FastIDSet(size);

            for (int i = 0; i < size; i++)
            {
                set.add(array.getItemID(i));
            }
            return(set);
        }
 public FastIDSet GetRelevantItemsIDs(long userID,
                                      int at,
                                      double relevanceThreshold,
                                      IDataModel dataModel) {
   IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);
   FastIDSet relevantItemIDs = new FastIDSet(at);
   prefs.SortByValueReversed();
   for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) {
     if (prefs.GetValue(i) >= relevanceThreshold) {
       relevantItemIDs.Add(prefs.GetItemID(i));
     }
   }
   return relevantItemIDs;
 }
Exemplo n.º 44
0
 public static IDataModel getBooleanDataModel(long[] userIDs, bool[][] prefs)
 {
     FastByIDMap<FastIDSet> result = new FastByIDMap<FastIDSet>();
     for (int i = 0; i < userIDs.Length; i++) {
       FastIDSet prefsSet = new FastIDSet();
       for (int j = 0; j < prefs[i].Length; j++) {
     if (prefs[i][j]) {
       prefsSet.Add(j);
     }
       }
       if (!prefsSet.IsEmpty()) {
     result.Put(userIDs[i], prefsSet);
       }
     }
     return new GenericBooleanPrefDataModel(result);
 }
 public override long[] GetUserNeighborhood(long userID) {
   
   IDataModel dataModel = getDataModel();
   FastIDSet neighborhood = new FastIDSet();
   var usersIterable = SamplinglongPrimitiveIterator.MaybeWrapIterator(dataModel
       .GetUserIDs(), getSamplingRate());
   IUserSimilarity userSimilarityImpl = getUserSimilarity();
   
   while (usersIterable.MoveNext()) {
     long otherUserID = usersIterable.Current;
     if (userID != otherUserID) {
       double theSimilarity = userSimilarityImpl.UserSimilarity(userID, otherUserID);
       if (!Double.IsNaN(theSimilarity) && theSimilarity >= threshold) {
         neighborhood.Add(otherUserID);
       }
     }
   }
   
   return neighborhood.ToArray();
 }
        public void recommend()
        {
            var dataModelMock = new DynamicMock( typeof(IDataModel) );
            var preferencesFromUserMock = new DynamicMock( typeof(IPreferenceArray) );
            var candidateItemsStrategyMock = new DynamicMock( typeof(ICandidateItemsStrategy) );
            var factorizerMock = new DynamicMock( typeof(IFactorizer) );
            var factorization = new Factorization_recommend_TestMock();

            FastIDSet candidateItems = new FastIDSet();
            candidateItems.Add(5L);
            candidateItems.Add(3L);

            factorizerMock.ExpectAndReturn("Factorize", factorization);

            dataModelMock.ExpectAndReturn("GetPreferencesFromUser", preferencesFromUserMock.MockInstance, (1L));

            candidateItemsStrategyMock.ExpectAndReturn("GetCandidateItems", candidateItems,
            1L, preferencesFromUserMock.MockInstance, dataModelMock.MockInstance);

            //EasyMock.replay(dataModel, candidateItemsStrategy, factorizer, factorization);

            SVDRecommender svdRecommender = new SVDRecommender(
            (IDataModel)dataModelMock.MockInstance,
            (IFactorizer)factorizerMock.MockInstance,
            (ICandidateItemsStrategy)candidateItemsStrategyMock.MockInstance);

            IList<IRecommendedItem> recommendedItems = svdRecommender.Recommend(1L, 5);
            Assert.AreEqual(2, recommendedItems.Count);
            Assert.AreEqual(3L, recommendedItems[0].GetItemID());
            Assert.AreEqual(2.0f, recommendedItems[0].GetValue(), EPSILON);
            Assert.AreEqual(5L, recommendedItems[1].GetItemID());
            Assert.AreEqual(1.0f, recommendedItems[1].GetValue(), EPSILON);

            dataModelMock.Verify();
            candidateItemsStrategyMock.Verify();
            factorizerMock.Verify();

            Assert.AreEqual(2, factorization.getItemFeaturesCallCount);
            Assert.AreEqual(2, factorization.getUserFeaturesCallCount);
            //EasyMock.verify(dataModel, candidateItemsStrategy, factorizer, factorization);
        }
   /// <p>
   /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This
   /// {@link DataModel} retains all this information in memory and is effectively immutable.
   /// </p>
   ///
   /// @param userData users to include
   /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch.
   ///  User IDs are mapped to maps of item IDs to long timestamps.
  public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) {
    //Preconditions.checkArgument(userData != null, "userData is null");

    this.preferenceFromUsers = userData;
    this.preferenceForItems = new FastByIDMap<FastIDSet>();
    FastIDSet itemIDSet = new FastIDSet();
    foreach (var entry in preferenceFromUsers.EntrySet()) {
      long userID = entry.Key;
      FastIDSet itemIDs1 = entry.Value;
      itemIDSet.AddAll(itemIDs1);
      var it = itemIDs1.GetEnumerator();
      while (it.MoveNext()) {
        long itemID = it.Current;
        FastIDSet userIDs1 = preferenceForItems.Get(itemID);
        if (userIDs1 == null) {
          userIDs1 = new FastIDSet(2);
          preferenceForItems.Put(itemID, userIDs1);
        }
        userIDs1.Add(userID);
      }
    }

    this.itemIDs = itemIDSet.ToArray();
    itemIDSet = null; // Might help GC -- this is big
    Array.Sort(itemIDs);

    this.userIDs = new long[userData.Count()];
    int i = 0;
    var it1 = userData.Keys.GetEnumerator();
    while (it1.MoveNext()) {
      userIDs[i++] = it1.Current;
    }
    Array.Sort(userIDs);

    this.timestamps = timestamps;
  }
  public void ProcessOtherUser(long userID,
                               FastIDSet relevantItemIDs,
                               FastByIDMap<IPreferenceArray> trainingUsers,
                               long otherUserID,
                               IDataModel dataModel) {
    IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID);
    // If we're dealing with the very user that we're evaluating for precision/recall,
    if (userID == otherUserID) {
      // then must remove all the test IDs, the "relevant" item IDs
      List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length());
      foreach (IPreference pref in prefs2Array) {
		  if (!relevantItemIDs.Contains(pref.GetItemID())) {
			  prefs2.Add(pref);
		  }
      }

      if (prefs2.Count>0) {
        trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2));
      }
    } else {
      // otherwise just add all those other user's prefs
      trainingUsers.Put(otherUserID, prefs2Array);
    }
  }
Exemplo n.º 49
0
 public override FastIDSet GetItemIDsFromUser(long userID) {
   IPreferenceArray prefs = GetPreferencesFromUser(userID);
   int size = prefs.Length();
   FastIDSet result = new FastIDSet(size);
   for (int i = 0; i < size; i++) {
     result.Add(prefs.GetItemID(i));
   }
   return result;
 }
   /// Sets temporary preferences for a given anonymous user.
  public void SetTempPrefs(IPreferenceArray prefs, long anonymousUserID) {
    //Preconditions.checkArgument(prefs != null && prefs.Length() > 0, "prefs is null or empty");

    this.tempPrefs[anonymousUserID] = prefs;
    FastIDSet userPrefItemIDs = new FastIDSet();

    for (int i = 0; i < prefs.Length(); i++) {
      userPrefItemIDs.Add(prefs.GetItemID(i));
    }

    this.prefItemIDs[anonymousUserID] = userPrefItemIDs;
  }
Exemplo n.º 51
0
 public void testReservedValues()
 {
     FastIDSet set = new FastIDSet();
     try {
       set.Add(Int64.MinValue);
       Assert.Fail("Should have thrown IllegalArgumentException");
     } catch (ArgumentException iae) { //IllegalArgumentException
       // good
     }
     Assert.False(set.Contains(Int64.MinValue));
     try {
       set.Add(long.MaxValue);
       Assert.Fail("Should have thrown IllegalArgumentException");
     } catch (ArgumentException iae) {
       // good
     }
     Assert.False(set.Contains(long.MaxValue));
 }
 private static long setBits(FastIDSet modelSet, IPreferenceArray prefs, int max) {
   long maxItem = -1;
   for (int i = 0; i < prefs.Length() && i < max; i++) {
     long itemID = prefs.GetItemID(i);
     modelSet.Add(itemID);
     if (itemID > maxItem) {
       maxItem = itemID;
     }
   }
   return maxItem;
 }
  protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) {
    var preferredItemIDsIterator = ((IEnumerable<long>)preferredItemIDs).GetEnumerator();
    if (preferredItemIDs.Length > maxItems) {
      double samplingRate = (double) maxItems / preferredItemIDs.Length;
      log.Info("preferredItemIDs.Length {0}, samplingRate {1}", preferredItemIDs.Length, samplingRate);
      preferredItemIDsIterator = 
          new SamplinglongPrimitiveIterator(preferredItemIDsIterator, samplingRate);
    }
    FastIDSet possibleItemsIDs = new FastIDSet();
    while (preferredItemIDsIterator.MoveNext()) {
      long itemID = preferredItemIDsIterator.Current;
      IPreferenceArray prefs = dataModel.GetPreferencesForItem(itemID);
      int prefsLength = prefs.Length();
	  if (prefsLength > maxUsersPerItem) {
        var sampledPrefs =
			new FixedSizeSamplingIterator<IPreference>(maxUsersPerItem, prefs.GetEnumerator());
        while (sampledPrefs.MoveNext()) {
          addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(sampledPrefs.Current.GetUserID()));
        }
      } else {
        for (int i = 0; i < prefsLength; i++) {
          addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(prefs.GetUserID(i)));
        }
      }
    }
    possibleItemsIDs.RemoveAll(preferredItemIDs);
    return possibleItemsIDs;
  }
Exemplo n.º 54
0
 public void testVersusHashSet()
 {
     FastIDSet actual = new FastIDSet(1);
     var expected = new HashSet<int>(); //1000000
     var r = RandomUtils.getRandom();
     for (int i = 0; i < 1000000; i++) {
       double d = r.nextDouble();
       var key = r.nextInt(100);
       if (d < 0.4) {
     Assert.AreEqual(expected.Contains(key), actual.Contains(key));
       } else {
     if (d < 0.7) {
       Assert.AreEqual(expected.Add(key), actual.Add(key));
     } else {
       Assert.AreEqual(expected.Remove(key), actual.Remove(key));
     }
     Assert.AreEqual(expected.Count, actual.Count() );
     Assert.AreEqual(expected.Count==0, actual.IsEmpty());
       }
     }
 }
Exemplo n.º 55
0
 private static FastIDSet buildTestFastSet()
 {
     FastIDSet set = new FastIDSet();
     set.Add(1);
     set.Add(2);
     set.Add(3);
     return set;
 }
 public PlusAnonymousUserDataModel(IDataModel deleg) {
   this._delegate = deleg;
   this.prefItemIDs = new FastIDSet();
 }
 private static long setBits(FastIDSet modelSet, IList<IRecommendedItem> items, int max) {
   long maxItem = -1;
   for (int i = 0; i < items.Count && i < max; i++) {
     long itemID = items[i].GetItemID();
     modelSet.Add(itemID);
     if (itemID > maxItem) {
       maxItem = itemID;
     }
   }
   return maxItem;
 }
 private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) {
   if (itemIDs.Count() > maxItemsPerUser) {
     var it =
         new SamplinglongPrimitiveIterator(itemIDs.GetEnumerator(), (double) maxItemsPerUser / itemIDs.Count() );
     while (it.MoveNext()) {
       possibleItemIDs.Add(it.Current);
     }
   } else {
     possibleItemIDs.AddAll(itemIDs);
   }
 }
Exemplo n.º 59
0
 public void testSizeEmpty()
 {
     FastIDSet set = new FastIDSet();
     Assert.AreEqual(0, set.Count());
     Assert.True(set.IsEmpty());
     set.Add(1);
     Assert.AreEqual(1, set.Count());
     Assert.False(set.IsEmpty());
     set.Remove(1);
     Assert.AreEqual(0, set.Count());
     Assert.True(set.IsEmpty());
 }
  public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<IPreferenceArray> data) {
    var res = new FastByIDMap<FastIDSet>( data.Count() );
	foreach (var entry in data.EntrySet()) {
      IPreferenceArray prefArray = entry.Value;
      int size = prefArray.Length();
      FastIDSet itemIDs = new FastIDSet(size);
      for (int i = 0; i < size; i++) {
        itemIDs.Add(prefArray.GetItemID(i));
      }
	 
	  res.Put( entry.Key, itemIDs );
    }
	return res;
  }