public override void SetPreference(long userID, long itemID, float value)
        {
            IDataModel dataModel = GetDataModel();
            double     prefDelta;

            try {
                float?oldPref = dataModel.GetPreferenceValue(userID, itemID);
                prefDelta = !oldPref.HasValue ? value : value - oldPref.Value;
            } catch (NoSuchUserException nsee) {
                prefDelta = value;
            }
            base.SetPreference(userID, itemID, value);
            lock (this) {
                //buildAveragesLock.writeLock().lock();
                IRunningAverage average = itemAverages.Get(itemID);
                if (average == null)
                {
                    IRunningAverage newAverage = new FullRunningAverage();
                    newAverage.AddDatum(prefDelta);
                    itemAverages.Put(itemID, newAverage);
                }
                else
                {
                    average.ChangeDatum(prefDelta);
                }
            }
            //finally {
            //buildAveragesLock.writeLock().unlock();
            //}
        }
Exemplo n.º 2
0
        public override double GetEvaluation(IDictionary <User, ICollection <Preference> > testUserPrefs,
                                             Recommender recommender)
        {
            RunningAverage average = new FullRunningAverage();

            foreach (KeyValuePair <User, ICollection <Preference> > entry in testUserPrefs)
            {
                foreach (Preference realPref in entry.Value)
                {
                    User testUser = entry.Key;
                    try
                    {
                        double estimatedPreference =
                            recommender.EstimatePreference(testUser.ID, realPref.Item.ID);
                        if (!double.IsNaN(estimatedPreference))
                        {
                            double diff = realPref.Value - estimatedPreference;
                            average.AddDatum(diff * diff);
                        }
                    }
                    catch (NoSuchElementException nsee)
                    {
                        // It's possible that an item exists in the test data but not training data in which case
                        // NSEE will be thrown. Just ignore it and move on.
                        log.Info("Element exists in test data but not training data: " + testUser.ID, nsee);
                    }
                }
            }
            return(Math.Sqrt(average.Average));
        }
        public void toyExample()
        {
            SVDRecommender svdRecommender = new SVDRecommender(dataModel, factorizer);

            /// a hold out test would be better, but this is just a toy example so we only check that the
            /// factorization is close to the original matrix
            IRunningAverage avg     = new FullRunningAverage();
            var             userIDs = dataModel.GetUserIDs();

            while (userIDs.MoveNext())
            {
                long userID = userIDs.Current;
                foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID))
                {
                    double rating   = pref.GetValue();
                    double estimate = svdRecommender.EstimatePreference(userID, pref.GetItemID());
                    double err      = rating - estimate;
                    avg.AddDatum(err * err);
                }
            }

            double rmse = Math.Sqrt(avg.GetAverage());

            Assert.True(rmse < 0.2);
        }
Exemplo n.º 4
0
        public void testRecommenderWithSyntheticData()
        {
            setUpSyntheticData();

            factorizer     = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0);
            svdRecommender = new SVDRecommender(dataModel, factorizer);

            /// a hold out test would be better, but this is just a toy example so we only check that the
            /// factorization is close to the original matrix
            IRunningAverage avg     = new FullRunningAverage();
            var             userIDs = dataModel.GetUserIDs();

            while (userIDs.MoveNext())
            {
                long userID = userIDs.Current;
                foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID))
                {
                    double rating   = pref.GetValue();
                    double estimate = svdRecommender.EstimatePreference(userID, pref.GetItemID());
                    double err      = rating - estimate;
                    avg.AddDatum(err * err);
                }
            }

            double rmse = Math.Sqrt(avg.GetAverage());

            logger.Info("rmse: " + rmse);
            Assert.True(rmse < 0.2);
        }
Exemplo n.º 5
0
            public double Estimate(long itemID)
            {
                IRunningAverage average = new FullRunningAverage();

                double[] similarities = similarity.ItemSimilarities(itemID, toItemIDs);
                for (int i = 0; i < toItemIDs.Length; i++)
                {
                    long toItemID           = toItemIDs[i];
                    Tuple <long, long> pair = new Tuple <long, long>(toItemID, itemID);
                    if (rescorer != null && rescorer.IsFiltered(pair))
                    {
                        continue;
                    }
                    double estimate = similarities[i];
                    if (rescorer != null)
                    {
                        estimate = rescorer.Rescore(pair, estimate);
                    }
                    if (excludeItemIfNotSimilarToAll || !Double.IsNaN(estimate))
                    {
                        average.AddDatum(estimate);
                    }
                }
                double averageEstimate = average.GetAverage();

                return(averageEstimate == 0 ? Double.NaN : averageEstimate);
            }
Exemplo n.º 6
0
            public double averateRating(long itemID)
            {
                IPreferenceArray prefs = dataModel.GetPreferencesForItem(itemID);
                IRunningAverage  avg   = new FullRunningAverage();

                foreach (IPreference pref in prefs)
                {
                    avg.AddDatum(pref.GetValue());
                }
                return(avg.GetAverage());
            }
        public void toyExampleImplicit()
        {
            var observations = new double[4, 4] {
                { 5.0, 5.0, 2.0, 0 },
                { 2.0, 0, 3.0, 5.0 },
                { 0, 5.0, 0, 3.0 },
                { 3.0, 0, 0, 5.0 }
            };

            var preferences = new double[4, 4] {
                { 1.0, 1.0, 1.0, 0 },
                { 1.0, 0, 1.0, 1.0 },
                { 0, 1.0, 0, 1.0 },
                { 1.0, 0, 0, 1.0 }
            };

            double alpha = 20;

            ALSWRFactorizer factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 5, true, alpha);

            SVDRecommender svdRecommender = new SVDRecommender(dataModel, factorizer);

            IRunningAverage avg = new FullRunningAverage();

            for (int sliceIdx = 0; sliceIdx < preferences.GetLength(0); sliceIdx++)
            {
                var slice = MatrixUtil.viewRow(preferences, sliceIdx);
                for (var eIndex = 0; eIndex < slice.Length; eIndex++)
                {
                    var  e      = slice[eIndex];
                    long userID = sliceIdx + 1;
                    long itemID = eIndex + 1;

                    if (!Double.IsNaN(e))
                    {
                        double pref     = e;
                        double estimate = svdRecommender.EstimatePreference(userID, itemID);

                        double confidence = 1 + alpha * observations[sliceIdx, eIndex];
                        double err        = confidence * (pref - estimate) * (pref - estimate);
                        avg.AddDatum(err);
                        Console.WriteLine("Comparing preference of user [{0}] towards item [{1}], was [{2}] with confidence [{3}] "
                                          + "estimate is [{4}]", sliceIdx, eIndex, pref, confidence, estimate);
                    }
                }
            }
            double rmse = Math.Sqrt(avg.GetAverage());

            Console.WriteLine("RMSE: {0}", rmse);

            Assert.True(rmse < 0.4);
        }
        double getAveragePreference()
        {
            IRunningAverage average = new FullRunningAverage();
            var             it      = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                foreach (IPreference pref in dataModel.GetPreferencesFromUser(it.Current))
                {
                    average.AddDatum(pref.GetValue());
                }
            }
            return(average.GetAverage());
        }
Exemplo n.º 9
0
            public double Estimate(Item item)
            {
                RunningAverage average = new FullRunningAverage();

                foreach (User user in cluster)
                {
                    Preference pref = user.GetPreferenceFor(item.ID);
                    if (pref != null)
                    {
                        average.AddDatum(pref.Value);
                    }
                }
                return(average.Average);
            }
Exemplo n.º 10
0
        /**
         * {@inheritDoc}
         */
        public override void SetPreference(Object userID, Object itemID, double value)
        {
            DataModel dataModel = this.DataModel;
            double    prefDelta;

            try
            {
                User       theUser = dataModel.GetUser(userID);
                Preference oldPref = theUser.GetPreferenceFor(itemID);
                prefDelta = oldPref == null ? value : value - oldPref.Value;
            }
            catch (NoSuchElementException)
            {
                prefDelta = value;
            }
            base.SetPreference(userID, itemID, value);
            buildAveragesLock.AcquireWriterLock(Constants.INFINITE_TIMEOUT);
            try
            {
                RunningAverage itemAverage;
                if (!itemAverages.TryGetValue(itemID, out itemAverage) || itemAverage == null)
                {
                    RunningAverage newItemAverage = new FullRunningAverage();
                    newItemAverage.AddDatum(prefDelta);
                    itemAverages.Add(itemID, newItemAverage);
                }
                else
                {
                    itemAverage.ChangeDatum(prefDelta);
                }

                RunningAverage userAverage;

                if (!userAverages.TryGetValue(userID, out userAverage) || userAverage == null)
                {
                    RunningAverage newUserAverage = new FullRunningAverage();
                    newUserAverage.AddDatum(prefDelta);
                    userAverages.Add(userID, newUserAverage);
                }
                else
                {
                    userAverage.ChangeDatum(prefDelta);
                }
                overallAveragePrefValue.ChangeDatum(prefDelta);
            } finally {
                buildAveragesLock.ReleaseWriterLock();
            }
        }
Exemplo n.º 11
0
            public Double GetValue(User key)
            {
                RunningAverage average = new FullRunningAverage();

                Preference[] prefs = key.GetPreferencesAsArray();
                if (prefs.Length == 0)
                {
                    return(0.0);
                }

                foreach (Preference pref in prefs)
                {
                    average.AddDatum(pref.Value);
                }
                return(average.Average);
            }
Exemplo n.º 12
0
            public double Estimate(Item item)
            {
                RunningAverage average = new FullRunningAverage();

                foreach (Item toItem in toItems)
                {
                    Pair <Item, Item> pair = new Pair <Item, Item>(toItem, item);
                    if (rescorer.IsFiltered(pair))
                    {
                        continue;
                    }
                    double estimate = correlation.GetItemCorrelation(toItem, item);
                    estimate = rescorer.Rescore(pair, estimate);
                    average.AddDatum(estimate);
                }
                return(average.Average);
            }
            public float Get(long key)
            {
                IPreferenceArray prefs = inf.dataModel.GetPreferencesFromUser(key);
                int size = prefs.Length();

                if (size == 0)
                {
                    return(ZERO);
                }
                IRunningAverage average = new FullRunningAverage();

                for (int i = 0; i < size; i++)
                {
                    average.AddDatum(prefs.GetValue(i));
                }
                return((float)average.GetAverage());
            }
Exemplo n.º 14
0
        public override void SetPreference(Object userID, Object itemID, double value)
        {
            double prefDelta;

            try
            {
                User       theUser = this.DataModel.GetUser(userID);
                Preference oldPref = theUser.GetPreferenceFor(itemID);
                prefDelta = oldPref == null ? value : value - oldPref.Value;
            }
            catch
            {
                prefDelta = value;
            }
            base.SetPreference(userID, itemID, value);

            buildAveragesLock.AcquireWriterLock(Constants.INFINITE_TIMEOUT);
            try
            {
                RunningAverage average = null;
                if (!itemAverages.TryGetValue(itemID, out average))
                {
                    RunningAverage newAverage = new FullRunningAverage();
                    newAverage.AddDatum(prefDelta);
                    itemAverages.Add(itemID, newAverage);
                }
                else
                {
                    average.ChangeDatum(prefDelta);
                }
            }
            finally
            {
                buildAveragesLock.ReleaseWriterLock();
            }
        }
        public IRStatistics Evaluate(RecommenderBuilder recommenderBuilder,
                                     DataModel dataModel,
                                     int at,
                                     double relevanceThreshold,
                                     double evaluationPercentage)
        {
            if (recommenderBuilder == null)
            {
                throw new ArgumentNullException("recommenderBuilder is null");
            }
            if (dataModel == null)
            {
                throw new ArgumentNullException("dataModel is null");
            }
            if (at < 1)
            {
                throw new ArgumentException("at must be at least 1");
            }
            if (double.IsNaN(evaluationPercentage) || evaluationPercentage <= 0.0 || evaluationPercentage > 1.0)
            {
                throw new ArgumentException("Invalid evaluationPercentage: " + evaluationPercentage);
            }
            if (double.IsNaN(relevanceThreshold))
            {
                throw new ArgumentException("Invalid relevanceThreshold: " + evaluationPercentage);
            }

            RunningAverage precision = new FullRunningAverage();
            RunningAverage recall    = new FullRunningAverage();

            foreach (User user in dataModel.GetUsers())
            {
                Object id = user.ID;
                if (random.NextDouble() < evaluationPercentage)
                {
                    ICollection <Item> relevantItems = new HashedSet <Item>(/* at */);
                    Preference[]       prefs         = user.GetPreferencesAsArray();

                    foreach (Preference pref in prefs)
                    {
                        if (pref.Value >= relevanceThreshold)
                        {
                            relevantItems.Add(pref.Item);
                        }
                    }
                    int numRelevantItems = relevantItems.Count;
                    if (numRelevantItems > 0)
                    {
                        ICollection <User> trainingUsers = new List <User>(dataModel.GetNumUsers());
                        foreach (User user2 in dataModel.GetUsers())
                        {
                            if (id.Equals(user2.ID))
                            {
                                ICollection <Preference> trainingPrefs = new List <Preference>();
                                prefs = user2.GetPreferencesAsArray();
                                foreach (Preference pref in prefs)
                                {
                                    if (!relevantItems.Contains(pref.Item))
                                    {
                                        trainingPrefs.Add(pref);
                                    }
                                }
                                if (trainingPrefs.Count > 0)
                                {
                                    User trainingUser = new GenericUser <String>(id.ToString(), trainingPrefs);
                                    trainingUsers.Add(trainingUser);
                                }
                            }
                            else
                            {
                                trainingUsers.Add(user2);
                            }
                        }
                        DataModel   trainingModel = new GenericDataModel(trainingUsers);
                        Recommender recommender   = recommenderBuilder.BuildRecommender(trainingModel);

                        try
                        {
                            trainingModel.GetUser(id);
                        }
                        catch (NoSuchElementException)
                        {
                            continue;                             // Oops we excluded all prefs for the user -- just move on
                        }

                        int intersectionSize = 0;
                        foreach (RecommendedItem recommendedItem in recommender.Recommend(id, at))
                        {
                            if (relevantItems.Contains(recommendedItem.Item))
                            {
                                intersectionSize++;
                            }
                        }
                        precision.AddDatum((double)intersectionSize / (double)at);
                        recall.AddDatum((double)intersectionSize / (double)numRelevantItems);
                    }
                }
            }

            return(new IRStatisticsImpl(precision.Average, recall.Average));
        }
Exemplo n.º 16
0
        public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                                     IDataModelBuilder dataModelBuilder,
                                     IDataModel dataModel,
                                     IDRescorer rescorer,
                                     int at,
                                     double relevanceThreshold,
                                     double evaluationPercentage)
        {
            //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
            //Preconditions.checkArgument(dataModel != null, "dataModel is null");
            //Preconditions.checkArgument(at >= 1, "at must be at least 1");
            //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
            //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

            int             numItems  = dataModel.GetNumItems();
            IRunningAverage precision = new FullRunningAverage();
            IRunningAverage recall    = new FullRunningAverage();
            IRunningAverage fallOut   = new FullRunningAverage();
            IRunningAverage nDCG      = new FullRunningAverage();
            int             numUsersRecommendedFor      = 0;
            int             numUsersWithRecommendations = 0;

            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;

                if (random.nextDouble() >= evaluationPercentage)
                {
                    // Skipped
                    continue;
                }

                var stopWatch = new System.Diagnostics.Stopwatch();
                stopWatch.Start();

                IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

                // List some most-preferred items that would count as (most) "relevant" results
                double    theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
                FastIDSet relevantItemIDs       = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

                int numRelevantItems = relevantItemIDs.Count();
                if (numRelevantItems <= 0)
                {
                    continue;
                }

                FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers());
                var it2 = dataModel.GetUserIDs();
                while (it2.MoveNext())
                {
                    dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
                }

                IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
                try {
                    trainingModel.GetPreferencesFromUser(userID);
                } catch (NoSuchUserException nsee) {
                    continue; // Oops we excluded all prefs for the user -- just move on
                }

                int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
                if (size < 2 * at)
                {
                    // Really not enough prefs to meaningfully evaluate this user
                    continue;
                }

                IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

                int intersectionSize = 0;
                var recommendedItems = recommender.Recommend(userID, at, rescorer);
                foreach (IRecommendedItem recommendedItem in recommendedItems)
                {
                    if (relevantItemIDs.Contains(recommendedItem.GetItemID()))
                    {
                        intersectionSize++;
                    }
                }

                int numRecommendedItems = recommendedItems.Count;

                // Precision
                if (numRecommendedItems > 0)
                {
                    precision.AddDatum((double)intersectionSize / (double)numRecommendedItems);
                }

                // Recall
                recall.AddDatum((double)intersectionSize / (double)numRelevantItems);

                // Fall-out
                if (numRelevantItems < size)
                {
                    fallOut.AddDatum((double)(numRecommendedItems - intersectionSize)
                                     / (double)(numItems - numRelevantItems));
                }

                // nDCG
                // In computing, assume relevant IDs have relevance 1 and others 0
                double cumulativeGain = 0.0;
                double idealizedGain  = 0.0;
                for (int i = 0; i < numRecommendedItems; i++)
                {
                    IRecommendedItem item     = recommendedItems[i];
                    double           discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
                    if (relevantItemIDs.Contains(item.GetItemID()))
                    {
                        cumulativeGain += discount;
                    }
                    // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

                    // Ideally results would be ordered with all relevant ones first, so this theoretical
                    // ideal list starts with number of relevant items equal to the total number of relevant items
                    if (i < numRelevantItems)
                    {
                        idealizedGain += discount;
                    }
                }
                if (idealizedGain > 0.0)
                {
                    nDCG.AddDatum(cumulativeGain / idealizedGain);
                }

                // Reach
                numUsersRecommendedFor++;
                if (numRecommendedItems > 0)
                {
                    numUsersWithRecommendations++;
                }

                stopWatch.Stop();

                log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
                log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
                         precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
                         (double)numUsersWithRecommendations / (double)numUsersRecommendedFor);
            }

            return(new IRStatisticsImpl(
                       precision.GetAverage(),
                       recall.GetAverage(),
                       fallOut.GetAverage(),
                       nDCG.GetAverage(),
                       (double)numUsersWithRecommendations / (double)numUsersRecommendedFor));
        }
Exemplo n.º 17
0
        public void testFactorizerWithWithSyntheticData()
        {
            setUpSyntheticData();

            var stopWatch = new System.Diagnostics.Stopwatch();

            stopWatch.Start();

            factorizer = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0);

            Factorization factorization = factorizer.Factorize();

            stopWatch.Stop();
            long duration = stopWatch.ElapsedMilliseconds;

            /// a hold out test would be better, but this is just a toy example so we only check that the
            /// factorization is close to the original matrix
            IRunningAverage    avg     = new FullRunningAverage();
            var                userIDs = dataModel.GetUserIDs();
            IEnumerator <long> itemIDs;

            while (userIDs.MoveNext())
            {
                long userID = userIDs.Current;
                foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID))
                {
                    double rating     = pref.GetValue();
                    var    userVector = factorization.getUserFeatures(userID);
                    var    itemVector = factorization.getItemFeatures(pref.GetItemID());
                    double estimate   = vectorDot(userVector, itemVector);
                    double err        = rating - estimate;

                    avg.AddDatum(err * err);
                }
            }

            double sum = 0.0;

            userIDs = dataModel.GetUserIDs();
            while (userIDs.MoveNext())
            {
                long   userID         = userIDs.Current;
                var    userVector     = factorization.getUserFeatures(userID);
                double regularization = vectorDot(userVector, userVector);
                sum += regularization;
            }

            itemIDs = dataModel.GetItemIDs();
            while (itemIDs.MoveNext())
            {
                long   itemID         = itemIDs.Current;
                var    itemVector     = factorization.getUserFeatures(itemID);
                double regularization = vectorDot(itemVector, itemVector);
                sum += regularization;
            }

            double rmse = Math.Sqrt(avg.GetAverage());
            double loss = avg.GetAverage() / 2 + lambda / 2 * sum;

            logger.Info("RMSE: " + rmse + ";\tLoss: " + loss + ";\tTime Used: " + duration + "ms");
            Assert.True(rmse < 0.2);
        }