Beispiel #1
0
            public Features(ALSWRFactorizer factorizer)
            {
                dataModel   = factorizer.dataModel;
                numFeatures = factorizer.numFeatures;
                var random = RandomUtils.getRandom();

                M = new double[dataModel.GetNumItems()][]; //numFeatures
                var itemIDsIterator = dataModel.GetItemIDs();

                while (itemIDsIterator.MoveNext())
                {
                    long itemID      = itemIDsIterator.Current;
                    int  itemIDIndex = factorizer.itemIndex(itemID);
                    M[itemIDIndex]    = new double[numFeatures];
                    M[itemIDIndex][0] = averateRating(itemID);
                    for (int feature = 1; feature < numFeatures; feature++)
                    {
                        M[itemIDIndex][feature] = random.nextDouble() * 0.1;
                    }
                }

                U = new double[dataModel.GetNumUsers()][]; //numFeatures
                for (int i = 0; i < U.Length; i++)
                {
                    U[i] = new double[numFeatures];
                }
            }
Beispiel #2
0
        public override IList <IRecommendedItem> Recommend(long userID, int howMany, IDRescorer rescorer)
        {
            IDataModel dataModel           = GetDataModel();
            int        numItems            = dataModel.GetNumItems();
            List <IRecommendedItem> result = new List <IRecommendedItem>(howMany);

            while (result.Count < howMany)
            {
                var it = dataModel.GetItemIDs();
                it.MoveNext();

                var skipNum = random.nextInt(numItems);
                for (int i = 0; i < skipNum; i++)
                {
                    if (!it.MoveNext())
                    {
                        break;
                    }                           // skip() ??
                }
                long itemID = it.Current;
                if (dataModel.GetPreferenceValue(userID, itemID) == null)
                {
                    result.Add(new GenericRecommendedItem(itemID, randomPref()));
                }
            }
            return(result);
        }
 protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) {
   FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems());
   var allItemIDs = dataModel.GetItemIDs();
   while (allItemIDs.MoveNext()) {
     possibleItemIDs.Add(allItemIDs.Current);
   }
   possibleItemIDs.RemoveAll(preferredItemIDs);
   return possibleItemIDs;
 }
Beispiel #4
0
        /// <p>
        /// Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will only keep the specified
        /// number of similarities from the given {@link DataModel}. It will keep those with the highest similarity
        /// -- those that are therefore most important.
        /// </p>
        ///
        /// <p>
        /// Thanks to tsmorton for suggesting this and providing part of the implementation.
        /// </p>
        ///
        /// @param otherSimilarity
        ///          other {@link ItemSimilarity} to get similarities from
        /// @param dataModel
        ///          data model to get items from
        /// @param maxToKeep
        ///          maximum number of similarities to keep
        /// @throws TasteException
        ///           if an error occurs while accessing the {@link DataModel} items
        public GenericItemSimilarity(IItemSimilarity otherSimilarity,
                                     IDataModel dataModel,
                                     int maxToKeep)
        {
            long[] itemIDs          = GenericUserSimilarity.longIteratorToList(dataModel.GetItemIDs());
            var    it               = new DataModelSimilaritiesIterator(otherSimilarity, itemIDs);
            var    keptSimilarities = TopItems.GetTopItemItemSimilarities(maxToKeep, it);

            initSimilarityMaps(keptSimilarities.GetEnumerator());
        }
        protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel)
        {
            FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems());
            var       allItemIDs      = dataModel.GetItemIDs();

            while (allItemIDs.MoveNext())
            {
                possibleItemIDs.Add(allItemIDs.Current);
            }
            possibleItemIDs.RemoveAll(preferredItemIDs);
            return(possibleItemIDs);
        }
        public virtual long[] AllSimilarItemIDs(long itemID)
        {
            FastIDSet allSimilarItemIDs = new FastIDSet();
            var       allItemIDs        = dataModel.GetItemIDs();

            while (allItemIDs.MoveNext())
            {
                long possiblySimilarItemID = allItemIDs.Current;
                if (!Double.IsNaN(ItemSimilarity(itemID, possiblySimilarItemID)))
                {
                    allSimilarItemIDs.Add(possiblySimilarItemID);
                }
            }
            return(allSimilarItemIDs.ToArray());
        }
        [Test] //(expected = NoSuchElementException.class)
        public void testGetItems()
        {
            var it = model.GetItemIDs();

            Assert.NotNull(it);
            Assert.True(it.MoveNext());
            Assert.AreEqual(123, it.Current);
            Assert.True(it.MoveNext());
            Assert.AreEqual(234, it.Current);
            Assert.True(it.MoveNext());
            Assert.AreEqual(456, it.Current);
            Assert.True(it.MoveNext());
            Assert.AreEqual(654, it.Current);
            Assert.True(it.MoveNext());
            Assert.AreEqual(789, it.Current);
            Assert.True(it.MoveNext());
            Assert.AreEqual(999, it.Current);
            Assert.False(it.MoveNext());
            it.MoveNext(); // exception
        }
    public Features(ALSWRFactorizer factorizer) {
      dataModel = factorizer.dataModel;
      numFeatures = factorizer.numFeatures;
      var random = RandomUtils.getRandom();
      M = new double[dataModel.GetNumItems()][]; //numFeatures
      var itemIDsIterator = dataModel.GetItemIDs();
      while (itemIDsIterator.MoveNext()) {
        long itemID = itemIDsIterator.Current;
        int itemIDIndex = factorizer.itemIndex(itemID);
		  M[itemIDIndex] = new double[numFeatures];
        M[itemIDIndex][0] = averateRating(itemID);
        for (int feature = 1; feature < numFeatures; feature++) {
          M[itemIDIndex][feature] = random.nextDouble() * 0.1;
        }
      }

      U = new double[dataModel.GetNumUsers()][]; //numFeatures
	  for (int i=0; i<U.Length; i++)
		  U[i] = new double[numFeatures];
    }
 private void buildMappings()
 {
     userIDMapping = createIDMapping(dataModel.GetNumUsers(), dataModel.GetUserIDs());
     itemIDMapping = createIDMapping(dataModel.GetNumItems(), dataModel.GetItemIDs());
 }
Beispiel #10
0
        public override Factorization Factorize()
        {
            log.Info("starting to compute the factorization...");
            Features features = new Features(this);

            /// feature maps necessary for solving for implicit feedback
            IDictionary <int, double[]> userY = null;
            IDictionary <int, double[]> itemY = null;

            if (usesImplicitFeedback)
            {
                userY = userFeaturesMapping(dataModel.GetUserIDs(), dataModel.GetNumUsers(), features.getU());
                itemY = itemFeaturesMapping(dataModel.GetItemIDs(), dataModel.GetNumItems(), features.getM());
            }

            IList <Task> tasks;

            for (int iteration = 0; iteration < numIterations; iteration++)
            {
                log.Info("iteration {0}", iteration);

                /// fix M - compute U
                tasks = new List <Task>();
                var userIDsIterator = dataModel.GetUserIDs();
                try {
                    ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback
            ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, itemY) : null;

                    while (userIDsIterator.MoveNext())
                    {
                        long             userID          = userIDsIterator.Current;
                        var              itemIDsFromUser = dataModel.GetItemIDsFromUser(userID).GetEnumerator();
                        IPreferenceArray userPrefs       = dataModel.GetPreferencesFromUser(userID);

                        tasks.Add(Task.Factory.StartNew(() => {
                            List <double[]> featureVectors = new List <double[]>();
                            while (itemIDsFromUser.MoveNext())
                            {
                                long itemID = itemIDsFromUser.Current;
                                featureVectors.Add(features.getItemFeatureColumn(itemIndex(itemID)));
                            }

                            var userFeatures = usesImplicitFeedback
                                          ? implicitFeedbackSolver.solve(sparseUserRatingVector(userPrefs))
                                          : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(userPrefs), lambda, numFeatures);

                            features.setFeatureColumnInU(userIndex(userID), userFeatures);
                        }
                                                        ));
                    }
                } finally {
                    // queue.shutdown();
                    try {
                        Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumUsers());
                    } catch (AggregateException e) {
                        log.Warn("Error when computing user features", e);
                        throw e;
                    }
                }

                /// fix U - compute M
                //queue = createQueue();
                tasks = new List <Task>();

                var itemIDsIterator = dataModel.GetItemIDs();
                try {
                    ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback
            ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, userY) : null;

                    while (itemIDsIterator.MoveNext())
                    {
                        long             itemID    = itemIDsIterator.Current;
                        IPreferenceArray itemPrefs = dataModel.GetPreferencesForItem(itemID);

                        tasks.Add(Task.Factory.StartNew(() => {
                            var featureVectors = new List <double[]>();
                            foreach (IPreference pref in itemPrefs)
                            {
                                long userID = pref.GetUserID();
                                featureVectors.Add(features.getUserFeatureColumn(userIndex(userID)));
                            }

                            var itemFeatures = usesImplicitFeedback
                  ? implicitFeedbackSolver.solve(sparseItemRatingVector(itemPrefs))
                  : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(itemPrefs), lambda, numFeatures);

                            features.setFeatureColumnInM(itemIndex(itemID), itemFeatures);
                        }));
                    }
                } finally {
                    try {
                        Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumItems());
                        //queue.awaitTermination(dataModel.getNumItems(), TimeUnit.SECONDS);
                    } catch (AggregateException e) {
                        log.Warn("Error when computing item features", e);
                        throw e;
                    }
                }
            }

            log.Info("finished computation of the factorization...");
            return(createFactorization(features.getU(), features.getM()));
        }
Beispiel #11
0
 /// <p>
 /// Builds a list of item-item similarities given an {@link ItemSimilarity} implementation and a
 /// {@link DataModel}, rather than a list of {@link ItemItemSimilarity}s.
 /// </p>
 ///
 /// <p>
 /// It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing some of the point of an
 /// item-based recommender. Item-based recommenders use the assumption that item-item similarities are
 /// relatively fixed, and might be known already independent of user preferences. Hence it is useful to
 /// inject that information, using {@link #GenericItemSimilarity(Iterable)}.
 /// </p>
 ///
 /// @param otherSimilarity
 ///          other {@link ItemSimilarity} to get similarities from
 /// @param dataModel
 ///          data model to get items from
 /// @throws TasteException
 ///           if an error occurs while accessing the {@link DataModel} items
 public GenericItemSimilarity(IItemSimilarity otherSimilarity, IDataModel dataModel)
 {
     long[] itemIDs = GenericUserSimilarity.longIteratorToList(dataModel.GetItemIDs());
     initSimilarityMaps(new DataModelSimilaritiesIterator(otherSimilarity, itemIDs));
 }
Beispiel #12
0
 public virtual IEnumerator <long> GetItemIDs()
 {
     return(_delegate.GetItemIDs());
     // Yeah ignoring items that only the plus-one user knows about... can't really happen
 }
Beispiel #13
0
        public void testFactorizerWithToyData()
        {
            setUpToyData();

            var stopWatch = new System.Diagnostics.Stopwatch();

            stopWatch.Start();

            factorizer = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0);

            Factorization factorization = factorizer.Factorize();

            stopWatch.Stop();
            long duration = stopWatch.ElapsedMilliseconds;

            /// a hold out test would be better, but this is just a toy example so we only check that the
            /// factorization is close to the original matrix
            IRunningAverage    avg     = new FullRunningAverage();
            var                userIDs = dataModel.GetUserIDs();
            IEnumerator <long> itemIDs;

            while (userIDs.MoveNext())
            {
                long userID = userIDs.Current;
                foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID))
                {
                    double rating     = pref.GetValue();
                    var    userVector = factorization.getUserFeatures(userID);
                    var    itemVector = factorization.getItemFeatures(pref.GetItemID());
                    double estimate   = vectorDot(userVector, itemVector); //userVector.dot(itemVector);

                    double err = rating - estimate;

                    avg.AddDatum(err * err);
                }
            }

            double sum = 0.0;

            userIDs = dataModel.GetUserIDs();
            while (userIDs.MoveNext())
            {
                long   userID         = userIDs.Current;
                var    userVector     = factorization.getUserFeatures(userID);
                double regularization = vectorDot(userVector, userVector);
                sum += regularization;
            }

            itemIDs = dataModel.GetItemIDs();
            while (itemIDs.MoveNext())
            {
                long   itemID         = itemIDs.Current;
                var    itemVector     = factorization.getUserFeatures(itemID);
                double regularization = vectorDot(itemVector, itemVector);
                sum += regularization;
            }

            double rmse = Math.Sqrt(avg.GetAverage());
            double loss = avg.GetAverage() / 2 + lambda / 2 * sum;

            logger.Info("RMSE: " + rmse + ";\tLoss: " + loss + ";\tTime Used: " + duration);
            Assert.True(rmse < 0.2);
        }
 public override IEnumerator <long> GetItemIDs()
 {
     return(_delegate.GetItemIDs());
 }