public void testFull()
        {
            IRunningAverage runningAverage = new FullRunningAverage();

            Assert.AreEqual(0, runningAverage.GetCount());
            Assert.True(Double.IsNaN(runningAverage.GetAverage()));
            runningAverage.AddDatum(1.0);
            Assert.AreEqual(1, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.AddDatum(1.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.AddDatum(4.0);
            Assert.AreEqual(3, runningAverage.GetCount());
            Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.AddDatum(-4.0);
            Assert.AreEqual(4, runningAverage.GetCount());
            Assert.AreEqual(0.5, runningAverage.GetAverage(), EPSILON);

            runningAverage.RemoveDatum(-4.0);
            Assert.AreEqual(3, runningAverage.GetCount());
            Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.RemoveDatum(4.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);

            runningAverage.ChangeDatum(0.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.ChangeDatum(2.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON);
        }
        public void testFull()
        {
            IRunningAverage runningAverage = new FullRunningAverage();

            Assert.AreEqual(0, runningAverage.GetCount());
            Assert.True(Double.IsNaN(runningAverage.GetAverage()));
            runningAverage.AddDatum(1.0);
            Assert.AreEqual(1, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.AddDatum(1.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.AddDatum(4.0);
            Assert.AreEqual(3, runningAverage.GetCount());
            Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.AddDatum(-4.0);
            Assert.AreEqual(4, runningAverage.GetCount());
            Assert.AreEqual(0.5, runningAverage.GetAverage(), EPSILON);

            runningAverage.RemoveDatum(-4.0);
            Assert.AreEqual(3, runningAverage.GetCount());
            Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.RemoveDatum(4.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);

            runningAverage.ChangeDatum(0.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);
            runningAverage.ChangeDatum(2.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON);
        }
        public void testCopyConstructor()
        {
            IRunningAverage runningAverage = new FullRunningAverage();

            runningAverage.AddDatum(1.0);
            runningAverage.AddDatum(1.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);

            IRunningAverage copy = new FullRunningAverage(runningAverage.GetCount(), runningAverage.GetAverage());
            Assert.AreEqual(2, copy.GetCount());
            Assert.AreEqual(1.0, copy.GetAverage(), EPSILON);
        }
        public void testCopyConstructor()
        {
            IRunningAverage runningAverage = new FullRunningAverage();

            runningAverage.AddDatum(1.0);
            runningAverage.AddDatum(1.0);
            Assert.AreEqual(2, runningAverage.GetCount());
            Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON);

            IRunningAverage copy = new FullRunningAverage(runningAverage.GetCount(), runningAverage.GetAverage());

            Assert.AreEqual(2, copy.GetCount());
            Assert.AreEqual(1.0, copy.GetAverage(), EPSILON);
        }
 double getAveragePreference() {
   IRunningAverage average = new FullRunningAverage();
   var it = dataModel.GetUserIDs();
   while (it.MoveNext()) {
     foreach (IPreference pref in dataModel.GetPreferencesFromUser(it.Current)) {
       average.AddDatum(pref.GetValue());
     }
   }
   return average.GetAverage();
 }
        public void toyExampleImplicit()
        {
            var observations = new double[4,4] {
            { 5.0, 5.0, 2.0, 0 },
            { 2.0, 0,   3.0, 5.0 },
            { 0,   5.0, 0,   3.0 },
            { 3.0, 0,   0,   5.0 } };

            var preferences = new double[4, 4] {
            { 1.0, 1.0, 1.0, 0 },
            { 1.0, 0,   1.0, 1.0 },
            { 0,   1.0, 0,   1.0 },
            { 1.0, 0,   0,   1.0 } };

            double alpha = 20;

            ALSWRFactorizer factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 5, true, alpha);

            SVDRecommender svdRecommender = new SVDRecommender(dataModel, factorizer);

            IRunningAverage avg = new FullRunningAverage();
            for (int sliceIdx = 0; sliceIdx < preferences.GetLength(0); sliceIdx++) {
              var slice = MatrixUtil.viewRow(preferences, sliceIdx);
              for (var eIndex=0; eIndex<slice.Length; eIndex++) {
              var e = slice[eIndex];
              long userID = sliceIdx + 1;
              long itemID = eIndex + 1;

            if (!Double.IsNaN(e)) {
              double pref = e;
              double estimate = svdRecommender.EstimatePreference(userID, itemID);

              double confidence = 1 + alpha * observations[sliceIdx, eIndex];
              double err = confidence * (pref - estimate) * (pref - estimate);
              avg.AddDatum(err);
              Console.WriteLine("Comparing preference of user [{0}] towards item [{1}], was [{2}] with confidence [{3}] "
              + "estimate is [{4}]", sliceIdx, eIndex, pref, confidence, estimate);
            }
              }
            }
            double rmse = Math.Sqrt(avg.GetAverage());
            Console.WriteLine("RMSE: {0}", rmse);

            Assert.True(rmse < 0.4);
        }
        public void toyExample()
        {
            SVDRecommender svdRecommender = new SVDRecommender(dataModel, factorizer);

               /// a hold out test would be better, but this is just a toy example so we only check that the
            /// factorization is close to the original matrix
            IRunningAverage avg = new FullRunningAverage();
            var userIDs = dataModel.GetUserIDs();
            while (userIDs.MoveNext()) {
              long userID = userIDs.Current;
              foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) {
            double rating = pref.GetValue();
            double estimate = svdRecommender.EstimatePreference(userID, pref.GetItemID());
            double err = rating - estimate;
            avg.AddDatum(err * err);
              }
            }

            double rmse = Math.Sqrt(avg.GetAverage());
            Assert.True(rmse < 0.2);
        }
  public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                               IDataModelBuilder dataModelBuilder,
                               IDataModel dataModel,
                               IDRescorer rescorer,
                               int at,
                               double relevanceThreshold,
                               double evaluationPercentage) {

    //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
    //Preconditions.checkArgument(dataModel != null, "dataModel is null");
    //Preconditions.checkArgument(at >= 1, "at must be at least 1");
    //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
    //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

    int numItems = dataModel.GetNumItems();
    IRunningAverage precision = new FullRunningAverage();
    IRunningAverage recall = new FullRunningAverage();
    IRunningAverage fallOut = new FullRunningAverage();
    IRunningAverage nDCG = new FullRunningAverage();
    int numUsersRecommendedFor = 0;
    int numUsersWithRecommendations = 0;

    var it = dataModel.GetUserIDs();
    while (it.MoveNext()) {

      long userID = it.Current;

      if (random.nextDouble() >= evaluationPercentage) {
        // Skipped
        continue;
      }

	  var stopWatch = new System.Diagnostics.Stopwatch();
	  stopWatch.Start();

      IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

      // List some most-preferred items that would count as (most) "relevant" results
      double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
      FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

      int numRelevantItems = relevantItemIDs.Count();
      if (numRelevantItems <= 0) {
        continue;
      }

      FastByIDMap<IPreferenceArray> trainingUsers = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers());
      var it2 = dataModel.GetUserIDs();
      while (it2.MoveNext()) {
        dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
      }

      IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
      try {
        trainingModel.GetPreferencesFromUser(userID);
      } catch (NoSuchUserException nsee) {
        continue; // Oops we excluded all prefs for the user -- just move on
      }

      int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
      if (size < 2 * at) {
        // Really not enough prefs to meaningfully evaluate this user
        continue;
      }

      IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

      int intersectionSize = 0;
      var recommendedItems = recommender.Recommend(userID, at, rescorer);
      foreach (IRecommendedItem recommendedItem in recommendedItems) {
        if (relevantItemIDs.Contains(recommendedItem.GetItemID())) {
          intersectionSize++;
        }
      }

      int numRecommendedItems = recommendedItems.Count;

      // Precision
      if (numRecommendedItems > 0) {
        precision.AddDatum((double) intersectionSize / (double) numRecommendedItems);
      }

      // Recall
      recall.AddDatum((double) intersectionSize / (double) numRelevantItems);

      // Fall-out
      if (numRelevantItems < size) {
        fallOut.AddDatum((double) (numRecommendedItems - intersectionSize)
                         / (double) (numItems - numRelevantItems));
      }

      // nDCG
      // In computing, assume relevant IDs have relevance 1 and others 0
      double cumulativeGain = 0.0;
      double idealizedGain = 0.0;
      for (int i = 0; i < numRecommendedItems; i++) {
        IRecommendedItem item = recommendedItems[i];
        double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
        if (relevantItemIDs.Contains(item.GetItemID())) {
          cumulativeGain += discount;
        }
        // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

        // Ideally results would be ordered with all relevant ones first, so this theoretical
        // ideal list starts with number of relevant items equal to the total number of relevant items
        if (i < numRelevantItems) {
          idealizedGain += discount;
        }
      }
      if (idealizedGain > 0.0) {
        nDCG.AddDatum(cumulativeGain / idealizedGain);
      }

      // Reach
      numUsersRecommendedFor++;
      if (numRecommendedItems > 0) {
        numUsersWithRecommendations++;
      }

	  stopWatch.Stop();

      log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
      log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
               precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
               (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
    }

    return new IRStatisticsImpl(
        precision.GetAverage(),
        recall.GetAverage(),
        fallOut.GetAverage(),
        nDCG.GetAverage(),
        (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
  }
 public double averateRating(long itemID) {
   IPreferenceArray prefs = dataModel.GetPreferencesForItem(itemID);
   IRunningAverage avg = new FullRunningAverage();
   foreach (IPreference pref in prefs) {
     avg.AddDatum(pref.GetValue());
   }
   return avg.GetAverage();
 }
        public void testRecommenderWithSyntheticData()
        {
            setUpSyntheticData();

            factorizer= new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0);
            svdRecommender = new SVDRecommender(dataModel, factorizer);

            /// a hold out test would be better, but this is just a toy example so we only check that the
             /// factorization is close to the original matrix
            IRunningAverage avg = new FullRunningAverage();
            var userIDs = dataModel.GetUserIDs();
            while (userIDs.MoveNext()) {
              long userID = userIDs.Current;
              foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) {
            double rating = pref.GetValue();
            double estimate = svdRecommender.EstimatePreference(userID, pref.GetItemID());
            double err = rating - estimate;
            avg.AddDatum(err * err);
              }
            }

            double rmse = Math.Sqrt(avg.GetAverage());
            logger.Info("rmse: " + rmse);
            Assert.True(rmse < 0.2);
        }
        public void testFactorizerWithWithSyntheticData()
        {
            setUpSyntheticData();

            var stopWatch = new System.Diagnostics.Stopwatch();
            stopWatch.Start();

            factorizer = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0);

            Factorization factorization = factorizer.Factorize();

            stopWatch.Stop();
            long duration = stopWatch.ElapsedMilliseconds;

            /// a hold out test would be better, but this is just a toy example so we only check that the
             /// factorization is close to the original matrix
            IRunningAverage avg = new FullRunningAverage();
            var userIDs = dataModel.GetUserIDs();
            IEnumerator<long> itemIDs;

            while (userIDs.MoveNext()) {
              long userID = userIDs.Current;
              foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) {
            double rating = pref.GetValue();
            var userVector = factorization.getUserFeatures(userID);
            var itemVector = factorization.getItemFeatures(pref.GetItemID());
            double estimate = vectorDot( userVector, itemVector);
            double err = rating - estimate;

            avg.AddDatum(err * err);
              }
            }

            double sum = 0.0;

            userIDs = dataModel.GetUserIDs();
            while (userIDs.MoveNext()) {
              long userID = userIDs.Current;
              var userVector = factorization.getUserFeatures(userID);
              double regularization = vectorDot( userVector, userVector);
              sum += regularization;
            }

            itemIDs = dataModel.GetItemIDs();
            while (itemIDs.MoveNext()) {
              long itemID = itemIDs.Current;
              var itemVector = factorization.getUserFeatures(itemID);
              double regularization = vectorDot( itemVector, itemVector);
              sum += regularization;
            }

            double rmse = Math.Sqrt(avg.GetAverage());
            double loss = avg.GetAverage() / 2 + lambda / 2 * sum;
            logger.Info("RMSE: " + rmse + ";\tLoss: " + loss + ";\tTime Used: " + duration + "ms");
            Assert.True(rmse < 0.2);
        }