public virtual double Evaluate(IRecommenderBuilder recommenderBuilder,
                                       IDataModelBuilder dataModelBuilder,
                                       IDataModel dataModel,
                                       double trainingPercentage,
                                       double evaluationPercentage)
        {
            //Preconditions.checkNotNull(recommenderBuilder);
            //Preconditions.checkNotNull(dataModel);
            //Preconditions.checkArgument(trainingPercentage >= 0.0 && trainingPercentage <= 1.0,
            //  "Invalid trainingPercentage: " + trainingPercentage + ". Must be: 0.0 <= trainingPercentage <= 1.0");
            //Preconditions.checkArgument(evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0,
            //  "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 <= evaluationPercentage <= 1.0");

            log.Info("Beginning evaluation using {} of {}", trainingPercentage, dataModel);

            int numUsers = dataModel.GetNumUsers();
            FastByIDMap <IPreferenceArray> trainingPrefs = new FastByIDMap <IPreferenceArray>(
                1 + (int)(evaluationPercentage * numUsers));
            FastByIDMap <IPreferenceArray> testPrefs = new FastByIDMap <IPreferenceArray>(
                1 + (int)(evaluationPercentage * numUsers));

            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;
                if (random.nextDouble() < evaluationPercentage)
                {
                    splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel);
                }
            }

            IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingPrefs)
        : dataModelBuilder.BuildDataModel(trainingPrefs);

            IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

            double result = getEvaluation(testPrefs, recommender);

            log.Info("Evaluation result: {}", result);
            return(result);
        }
  public virtual double Evaluate(IRecommenderBuilder recommenderBuilder,
                         IDataModelBuilder dataModelBuilder,
                         IDataModel dataModel,
                         double trainingPercentage,
                         double evaluationPercentage) {
    //Preconditions.checkNotNull(recommenderBuilder);
    //Preconditions.checkNotNull(dataModel);
    //Preconditions.checkArgument(trainingPercentage >= 0.0 && trainingPercentage <= 1.0,
    //  "Invalid trainingPercentage: " + trainingPercentage + ". Must be: 0.0 <= trainingPercentage <= 1.0");
    //Preconditions.checkArgument(evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0,
    //  "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 <= evaluationPercentage <= 1.0");

    log.Info("Beginning evaluation using {} of {}", trainingPercentage, dataModel);
    
    int numUsers = dataModel.GetNumUsers();
    FastByIDMap<IPreferenceArray> trainingPrefs = new FastByIDMap<IPreferenceArray>(
        1 + (int) (evaluationPercentage * numUsers));
    FastByIDMap<IPreferenceArray> testPrefs = new FastByIDMap<IPreferenceArray>(
        1 + (int) (evaluationPercentage * numUsers));
    
    var it = dataModel.GetUserIDs();
    while (it.MoveNext()) {
      long userID = it.Current;
      if (random.nextDouble() < evaluationPercentage) {
        splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel);
      }
    }
    
    IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingPrefs)
        : dataModelBuilder.BuildDataModel(trainingPrefs);
    
    IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);
    
    double result = getEvaluation(testPrefs, recommender);
    log.Info("Evaluation result: {}", result);
    return result;
  }
コード例 #3
0
  public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                               IDataModelBuilder dataModelBuilder,
                               IDataModel dataModel,
                               IDRescorer rescorer,
                               int at,
                               double relevanceThreshold,
                               double evaluationPercentage) {

    //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
    //Preconditions.checkArgument(dataModel != null, "dataModel is null");
    //Preconditions.checkArgument(at >= 1, "at must be at least 1");
    //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
    //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

    int numItems = dataModel.GetNumItems();
    IRunningAverage precision = new FullRunningAverage();
    IRunningAverage recall = new FullRunningAverage();
    IRunningAverage fallOut = new FullRunningAverage();
    IRunningAverage nDCG = new FullRunningAverage();
    int numUsersRecommendedFor = 0;
    int numUsersWithRecommendations = 0;

    var it = dataModel.GetUserIDs();
    while (it.MoveNext()) {

      long userID = it.Current;

      if (random.nextDouble() >= evaluationPercentage) {
        // Skipped
        continue;
      }

	  var stopWatch = new System.Diagnostics.Stopwatch();
	  stopWatch.Start();

      IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

      // List some most-preferred items that would count as (most) "relevant" results
      double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
      FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

      int numRelevantItems = relevantItemIDs.Count();
      if (numRelevantItems <= 0) {
        continue;
      }

      FastByIDMap<IPreferenceArray> trainingUsers = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers());
      var it2 = dataModel.GetUserIDs();
      while (it2.MoveNext()) {
        dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
      }

      IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
      try {
        trainingModel.GetPreferencesFromUser(userID);
      } catch (NoSuchUserException nsee) {
        continue; // Oops we excluded all prefs for the user -- just move on
      }

      int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
      if (size < 2 * at) {
        // Really not enough prefs to meaningfully evaluate this user
        continue;
      }

      IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

      int intersectionSize = 0;
      var recommendedItems = recommender.Recommend(userID, at, rescorer);
      foreach (IRecommendedItem recommendedItem in recommendedItems) {
        if (relevantItemIDs.Contains(recommendedItem.GetItemID())) {
          intersectionSize++;
        }
      }

      int numRecommendedItems = recommendedItems.Count;

      // Precision
      if (numRecommendedItems > 0) {
        precision.AddDatum((double) intersectionSize / (double) numRecommendedItems);
      }

      // Recall
      recall.AddDatum((double) intersectionSize / (double) numRelevantItems);

      // Fall-out
      if (numRelevantItems < size) {
        fallOut.AddDatum((double) (numRecommendedItems - intersectionSize)
                         / (double) (numItems - numRelevantItems));
      }

      // nDCG
      // In computing, assume relevant IDs have relevance 1 and others 0
      double cumulativeGain = 0.0;
      double idealizedGain = 0.0;
      for (int i = 0; i < numRecommendedItems; i++) {
        IRecommendedItem item = recommendedItems[i];
        double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
        if (relevantItemIDs.Contains(item.GetItemID())) {
          cumulativeGain += discount;
        }
        // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

        // Ideally results would be ordered with all relevant ones first, so this theoretical
        // ideal list starts with number of relevant items equal to the total number of relevant items
        if (i < numRelevantItems) {
          idealizedGain += discount;
        }
      }
      if (idealizedGain > 0.0) {
        nDCG.AddDatum(cumulativeGain / idealizedGain);
      }

      // Reach
      numUsersRecommendedFor++;
      if (numRecommendedItems > 0) {
        numUsersWithRecommendations++;
      }

	  stopWatch.Stop();

      log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
      log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
               precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
               (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
    }

    return new IRStatisticsImpl(
        precision.GetAverage(),
        recall.GetAverage(),
        fallOut.GetAverage(),
        nDCG.GetAverage(),
        (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
  }
コード例 #4
0
        public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder,
                                     IDataModelBuilder dataModelBuilder,
                                     IDataModel dataModel,
                                     IDRescorer rescorer,
                                     int at,
                                     double relevanceThreshold,
                                     double evaluationPercentage)
        {
            //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
            //Preconditions.checkArgument(dataModel != null, "dataModel is null");
            //Preconditions.checkArgument(at >= 1, "at must be at least 1");
            //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
            //    "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0");

            int             numItems  = dataModel.GetNumItems();
            IRunningAverage precision = new FullRunningAverage();
            IRunningAverage recall    = new FullRunningAverage();
            IRunningAverage fallOut   = new FullRunningAverage();
            IRunningAverage nDCG      = new FullRunningAverage();
            int             numUsersRecommendedFor      = 0;
            int             numUsersWithRecommendations = 0;

            var it = dataModel.GetUserIDs();

            while (it.MoveNext())
            {
                long userID = it.Current;

                if (random.nextDouble() >= evaluationPercentage)
                {
                    // Skipped
                    continue;
                }

                var stopWatch = new System.Diagnostics.Stopwatch();
                stopWatch.Start();

                IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID);

                // List some most-preferred items that would count as (most) "relevant" results
                double    theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
                FastIDSet relevantItemIDs       = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);

                int numRelevantItems = relevantItemIDs.Count();
                if (numRelevantItems <= 0)
                {
                    continue;
                }

                FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers());
                var it2 = dataModel.GetUserIDs();
                while (it2.MoveNext())
                {
                    dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel);
                }

                IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
          : dataModelBuilder.BuildDataModel(trainingUsers);
                try {
                    trainingModel.GetPreferencesFromUser(userID);
                } catch (NoSuchUserException nsee) {
                    continue; // Oops we excluded all prefs for the user -- just move on
                }

                int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count();
                if (size < 2 * at)
                {
                    // Really not enough prefs to meaningfully evaluate this user
                    continue;
                }

                IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel);

                int intersectionSize = 0;
                var recommendedItems = recommender.Recommend(userID, at, rescorer);
                foreach (IRecommendedItem recommendedItem in recommendedItems)
                {
                    if (relevantItemIDs.Contains(recommendedItem.GetItemID()))
                    {
                        intersectionSize++;
                    }
                }

                int numRecommendedItems = recommendedItems.Count;

                // Precision
                if (numRecommendedItems > 0)
                {
                    precision.AddDatum((double)intersectionSize / (double)numRecommendedItems);
                }

                // Recall
                recall.AddDatum((double)intersectionSize / (double)numRelevantItems);

                // Fall-out
                if (numRelevantItems < size)
                {
                    fallOut.AddDatum((double)(numRecommendedItems - intersectionSize)
                                     / (double)(numItems - numRelevantItems));
                }

                // nDCG
                // In computing, assume relevant IDs have relevance 1 and others 0
                double cumulativeGain = 0.0;
                double idealizedGain  = 0.0;
                for (int i = 0; i < numRecommendedItems; i++)
                {
                    IRecommendedItem item     = recommendedItems[i];
                    double           discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
                    if (relevantItemIDs.Contains(item.GetItemID()))
                    {
                        cumulativeGain += discount;
                    }
                    // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

                    // Ideally results would be ordered with all relevant ones first, so this theoretical
                    // ideal list starts with number of relevant items equal to the total number of relevant items
                    if (i < numRelevantItems)
                    {
                        idealizedGain += discount;
                    }
                }
                if (idealizedGain > 0.0)
                {
                    nDCG.AddDatum(cumulativeGain / idealizedGain);
                }

                // Reach
                numUsersRecommendedFor++;
                if (numRecommendedItems > 0)
                {
                    numUsersWithRecommendations++;
                }

                stopWatch.Stop();

                log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds);
                log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}",
                         precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(),
                         (double)numUsersWithRecommendations / (double)numUsersRecommendedFor);
            }

            return(new IRStatisticsImpl(
                       precision.GetAverage(),
                       recall.GetAverage(),
                       fallOut.GetAverage(),
                       nDCG.GetAverage(),
                       (double)numUsersWithRecommendations / (double)numUsersRecommendedFor));
        }