Beispiel #1
0
        public double userSimilarity(long userID1, long userID2)
        {
            DataModel model = base.getDataModel();
            FastIDSet other = model.getItemIDsFromUser(userID1);
            FastIDSet set2  = model.getItemIDsFromUser(userID2);
            int       num   = other.size();
            int       num2  = set2.size();

            if ((num == 0) && (num2 == 0))
            {
                return(double.NaN);
            }
            if ((num == 0) || (num2 == 0))
            {
                return(0.0);
            }
            int num3 = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2);

            if (num3 == 0)
            {
                return(double.NaN);
            }
            int num4 = (num + num2) - num3;

            return(((double)num3) / ((double)num4));
        }
        protected override void prepareTraining()
        {
            int num;
            int num2;

            base.prepareTraining();
            RandomWrapper wrapper = RandomUtils.getRandom();

            this.p = new double[base.dataModel.getNumUsers()][];
            for (num = 0; num < this.p.Length; num++)
            {
                this.p[num] = new double[base.numFeatures];
                num2        = 0;
                while (num2 < RatingSGDFactorizer.FEATURE_OFFSET)
                {
                    this.p[num][num2] = 0.0;
                    num2++;
                }
                num2 = RatingSGDFactorizer.FEATURE_OFFSET;
                while (num2 < base.numFeatures)
                {
                    this.p[num][num2] = wrapper.nextGaussian() * base.randomNoise;
                    num2++;
                }
            }
            this.y = new double[base.dataModel.getNumItems()][];
            for (num = 0; num < this.y.Length; num++)
            {
                this.y[num] = new double[base.numFeatures];
                num2        = 0;
                while (num2 < RatingSGDFactorizer.FEATURE_OFFSET)
                {
                    this.y[num][num2] = 0.0;
                    num2++;
                }
                for (num2 = RatingSGDFactorizer.FEATURE_OFFSET; num2 < base.numFeatures; num2++)
                {
                    this.y[num][num2] = wrapper.nextGaussian() * base.randomNoise;
                }
            }
            this.itemsByUser = new Dictionary <int, List <int> >();
            IEnumerator <long> enumerator = base.dataModel.getUserIDs();

            while (enumerator.MoveNext())
            {
                long       current = enumerator.Current;
                int        num4    = base.userIndex(current);
                FastIDSet  set     = base.dataModel.getItemIDsFromUser(current);
                List <int> list    = new List <int>(set.size());
                this.itemsByUser[num4] = list;
                foreach (long num5 in set)
                {
                    int item = base.itemIndex(num5);
                    list.Add(item);
                }
            }
        }
        public double userSimilarity(long userID1, long userID2)
        {
            DataModel model        = base.getDataModel();
            FastIDSet other        = model.getItemIDsFromUser(userID1);
            FastIDSet set2         = model.getItemIDsFromUser(userID2);
            int       num          = other.size();
            int       num2         = set2.size();
            int       intersection = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2);

            return(doSimilarity(num, num2, intersection));
        }
        private void findClusters(List <FastIDSet> newClusters)
        {
            if (clusteringByThreshold)
            {
                KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters);
                FastIDSet _cluster1 = nearestPair.Key;
                FastIDSet _cluster2 = nearestPair.Value;

                if (_cluster1 != null && _cluster2 != null)
                {
                    FastIDSet cluster1 = _cluster1;
                    FastIDSet cluster2 = _cluster2;
                    while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold)
                    {
                        newClusters.Remove(cluster1);
                        newClusters.Remove(cluster2);
                        FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size());
                        merged.addAll(cluster1);
                        merged.addAll(cluster2);
                        newClusters.Add(merged);
                        nearestPair = findNearestClusters(newClusters);
                        var __cluster1 = nearestPair.Key;
                        var __cluster2 = nearestPair.Value;
                        if (__cluster1 == null || __cluster2 == null)
                        {
                            break;
                        }
                        cluster1 = __cluster1;
                        cluster2 = __cluster2;
                    }
                }
            }
            else
            {
                while (newClusters.Count > numClusters)
                {
                    KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters);
                    FastIDSet _cluster1 = nearestPair.Key;
                    FastIDSet _cluster2 = nearestPair.Value;
                    if (_cluster1 == null || _cluster2 == null)
                    {
                        break;
                    }
                    FastIDSet cluster1 = _cluster1;
                    FastIDSet cluster2 = _cluster2;
                    newClusters.Remove(cluster1);
                    newClusters.Remove(cluster2);
                    FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size());
                    merged.addAll(cluster1);
                    merged.addAll(cluster2);
                    newClusters.Add(merged);
                }
            }
        }
Beispiel #5
0
 private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs)
 {
     if (itemIDs.size() > this.maxItemsPerUser)
     {
         SamplingLongPrimitiveIterator iterator = new SamplingLongPrimitiveIterator(itemIDs.GetEnumerator(), ((double)this.maxItemsPerUser) / ((double)itemIDs.size()));
         while (iterator.MoveNext())
         {
             possibleItemIDs.add(iterator.Current);
         }
     }
     else
     {
         possibleItemIDs.addAll(itemIDs);
     }
 }
        public FastIDSet getRelevantItemsIDs(long userID, int at, double relevanceThreshold, DataModel dataModel)
        {
            PreferenceArray array = dataModel.getPreferencesFromUser(userID);
            FastIDSet       set   = new FastIDSet(at);

            array.sortByValueReversed();
            for (int i = 0; (i < array.length()) && (set.size() < at); i++)
            {
                if (array.getValue(i) >= relevanceThreshold)
                {
                    set.add(array.getItemID(i));
                }
            }
            return(set);
        }
        public override int getNumUsersWithPreferenceFor(long itemID1, long itemID2)
        {
            FastIDSet other = this.preferenceForItems.get(itemID1);

            if (other == null)
            {
                return(0);
            }
            FastIDSet set2 = this.preferenceForItems.get(itemID2);

            if (set2 == null)
            {
                return(0);
            }
            return((other.size() < set2.size()) ? set2.intersectionSize(other) : other.intersectionSize(set2));
        }
Beispiel #8
0
        public double userSimilarity(long userID1, long userID2)
        {
            DataModel model = base.getDataModel();
            FastIDSet other = model.getItemIDsFromUser(userID1);
            FastIDSet set2  = model.getItemIDsFromUser(userID2);
            long      num   = other.size();
            long      num2  = set2.size();
            long      num3  = (num < num2) ? ((long)set2.intersectionSize(other)) : ((long)other.intersectionSize(set2));

            if (num3 == 0L)
            {
                return(double.NaN);
            }
            long   num4 = model.getNumItems();
            double num5 = LogLikelihood.logLikelihoodRatio(num3, num2 - num3, num - num3, ((num4 - num) - num2) + num3);

            return(1.0 - (1.0 / (1.0 + num5)));
        }
        private List <RecommendedItem> computeTopRecsForCluster(FastIDSet cluster)
        {
            DataModel dataModel       = getDataModel();
            FastIDSet possibleItemIDs = new FastIDSet();
            var       it = cluster.GetEnumerator();

            while (it.MoveNext())
            {
                possibleItemIDs.addAll(dataModel.getItemIDsFromUser(it.Current));
            }

            TopItems.Estimator <long> estimator = new Estimator(this, cluster);

            List <RecommendedItem> topItems =
                TopItems.getTopItems(possibleItemIDs.size(), possibleItemIDs.GetEnumerator(), null, estimator);

            log.debug("Recommendations are: {}", topItems);
            return(topItems);
        }
        public override PreferenceArray getPreferencesFromUser(long userID)
        {
            FastIDSet set = this.preferenceFromUsers.get(userID);

            if (set == null)
            {
                throw new NoSuchUserException(userID);
            }
            PreferenceArray    array      = new BooleanUserPreferenceArray(set.size());
            int                i          = 0;
            IEnumerator <long> enumerator = set.GetEnumerator();

            while (enumerator.MoveNext())
            {
                array.setUserID(i, userID);
                array.setItemID(i, enumerator.Current);
                i++;
            }
            return(array);
        }
        private bool mergeClosestClusters(int numUsers, List <FastIDSet> clusters, bool done)
        {
            // We find a certain number of closest clusters...
            List <ClusterClusterPair> queue = findClosestClusters(numUsers, clusters);

            //  List<ClusterClusterPair> queue = new List<ClusterClusterPair>();
            //foreach (var item in _queue)
            //{
            //    queue.Enqueue(item);
            //}

            // The first one is definitely the closest pair in existence so we can cluster
            // the two together, put it back into the set of clusters, and start again. Instead
            // we assume everything else in our list of closest cluster pairs is still pretty good,
            // and we cluster them too.

            for (int n = 0; n < queue.Count; n++)
            {
                //}
                //while (queue.Count > 0)
                //{
                if (!clusteringByThreshold && clusters.Count <= numClusters)
                {
                    done = true;
                    break;
                }

                ClusterClusterPair top = queue[n];
                queue.RemoveAt(n);
                if (clusteringByThreshold && top.getSimilarity() < clusteringThreshold)
                {
                    done = true;
                    break;
                }

                FastIDSet cluster1 = top.getCluster1();
                FastIDSet cluster2 = top.getCluster2();

                // Pull out current two clusters from clusters
                var  clusterIterator = clusters;
                bool removed1        = false;
                bool removed2        = false;
                for (int m = 0; m < clusterIterator.Count; m++)
                {
                    if (!(removed1 && removed2))
                    {
                        FastIDSet current = clusterIterator[m];

                        // Yes, use == here
                        if (!removed1 && cluster1 == current)
                        {
                            clusterIterator.RemoveAt(m);
                            m--;
                            removed1 = true;
                        }
                        else if (!removed2 && cluster2 == current)
                        {
                            clusterIterator.RemoveAt(m);
                            m--;
                            removed2 = true;
                        }
                    }

                    // The only catch is if a cluster showed it twice in the list of best cluster pairs;
                    // have to remove the others. Pull out anything referencing these clusters from queue
                    for (int k = 0; k < queue.Count; k++)
                    {
                        //}

                        //    for (Iterator<ClusterClusterPair> queueIterator = queue.iterator(); queueIterator.hasNext(); )
                        //    {
                        ClusterClusterPair pair  = queue[k];
                        FastIDSet          pair1 = pair.getCluster1();
                        FastIDSet          pair2 = pair.getCluster2();
                        if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2)
                        {
                            queue.RemoveAt(k);
                            //queueIterator.remove();
                        }
                    }

                    // Make new merged cluster
                    FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size());
                    merged.addAll(cluster1);
                    merged.addAll(cluster2);

                    // Compare against other clusters; update queue if needed
                    // That new pair we're just adding might be pretty close to something else, so
                    // catch that case here and put it back into our queue
                    for (var i = 0; i < clusters.Count; i++)
                    {
                        FastIDSet cluster    = clusters[i];
                        double    similarity = clusterSimilarity.getSimilarity(merged, cluster);
                        if (similarity > queue[queue.Count - 1].getSimilarity())
                        {
                            var queueIterator = queue.GetEnumerator();

                            while (queueIterator.MoveNext())
                            {
                                if (similarity > queueIterator.Current.getSimilarity())
                                {
                                    n--;
                                    // queueIterator.previous();
                                    break;
                                }
                            }
                            queue.Add(new ClusterClusterPair(merged, cluster, similarity));
                        }
                    }

                    // Finally add new cluster to list
                    clusters.Add(merged);
                }
            }
            return(done);
        }
        public IRStatistics evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder, DataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage)
        {
            int                num        = dataModel.getNumItems();
            RunningAverage     average    = new FullRunningAverage();
            RunningAverage     average2   = new FullRunningAverage();
            RunningAverage     average3   = new FullRunningAverage();
            RunningAverage     average4   = new FullRunningAverage();
            int                num2       = 0;
            int                num3       = 0;
            IEnumerator <long> enumerator = dataModel.getUserIDs();

            while (enumerator.MoveNext())
            {
                long current = enumerator.Current;
                if (this.random.nextDouble() < evaluationPercentage)
                {
                    Stopwatch stopwatch = new Stopwatch();
                    stopwatch.Start();
                    PreferenceArray prefs           = dataModel.getPreferencesFromUser(current);
                    double          num5            = double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
                    FastIDSet       relevantItemIDs = this.dataSplitter.getRelevantItemsIDs(current, at, num5, dataModel);
                    int             num6            = relevantItemIDs.size();
                    if (num6 > 0)
                    {
                        FastByIDMap <PreferenceArray> trainingUsers = new FastByIDMap <PreferenceArray>(dataModel.getNumUsers());
                        IEnumerator <long>            enumerator2   = dataModel.getUserIDs();
                        while (enumerator2.MoveNext())
                        {
                            this.dataSplitter.processOtherUser(current, relevantItemIDs, trainingUsers, enumerator2.Current, dataModel);
                        }
                        DataModel model = (dataModelBuilder == null) ? new GenericDataModel(trainingUsers) : dataModelBuilder.buildDataModel(trainingUsers);
                        try
                        {
                            model.getPreferencesFromUser(current);
                        }
                        catch (NoSuchUserException)
                        {
                            continue;
                        }
                        int num7 = num6 + model.getItemIDsFromUser(current).size();
                        if (num7 >= (2 * at))
                        {
                            Recommender            recommender = recommenderBuilder.buildRecommender(model);
                            int                    num8        = 0;
                            List <RecommendedItem> list        = recommender.recommend(current, at, rescorer);
                            foreach (RecommendedItem item in list)
                            {
                                if (relevantItemIDs.contains(item.getItemID()))
                                {
                                    num8++;
                                }
                            }
                            int count = list.Count;
                            if (count > 0)
                            {
                                average.addDatum(((double)num8) / ((double)count));
                            }
                            average2.addDatum(((double)num8) / ((double)num6));
                            if (num6 < num7)
                            {
                                average3.addDatum(((double)(count - num8)) / ((double)(num - num6)));
                            }
                            double num10 = 0.0;
                            double num11 = 0.0;
                            for (int i = 0; i < count; i++)
                            {
                                RecommendedItem item2 = list[i];
                                double          num13 = 1.0 / log2(i + 2.0);
                                if (relevantItemIDs.contains(item2.getItemID()))
                                {
                                    num10 += num13;
                                }
                                if (i < num6)
                                {
                                    num11 += num13;
                                }
                            }
                            if (num11 > 0.0)
                            {
                                average4.addDatum(num10 / num11);
                            }
                            num2++;
                            if (count > 0)
                            {
                                num3++;
                            }
                            stopwatch.Stop();
                            log.info("Evaluated with user {} in {}ms", new object[] { current, stopwatch.ElapsedMilliseconds });
                            log.info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", new object[] { average.getAverage(), average2.getAverage(), average3.getAverage(), average4.getAverage(), ((double)num3) / ((double)num2) });
                        }
                    }
                }
            }
            return(new IRStatisticsImpl(average.getAverage(), average2.getAverage(), average3.getAverage(), average4.getAverage(), ((double)num3) / ((double)num2)));
        }
        public override int getNumUsersWithPreferenceFor(long itemID)
        {
            FastIDSet set = this.preferenceForItems.get(itemID);

            return((set == null) ? 0 : set.size());
        }