public void testFull() { IRunningAverage runningAverage = new FullRunningAverage(); Assert.AreEqual(0, runningAverage.GetCount()); Assert.True(Double.IsNaN(runningAverage.GetAverage())); runningAverage.AddDatum(1.0); Assert.AreEqual(1, runningAverage.GetCount()); Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON); runningAverage.AddDatum(1.0); Assert.AreEqual(2, runningAverage.GetCount()); Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON); runningAverage.AddDatum(4.0); Assert.AreEqual(3, runningAverage.GetCount()); Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON); runningAverage.AddDatum(-4.0); Assert.AreEqual(4, runningAverage.GetCount()); Assert.AreEqual(0.5, runningAverage.GetAverage(), EPSILON); runningAverage.RemoveDatum(-4.0); Assert.AreEqual(3, runningAverage.GetCount()); Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON); runningAverage.RemoveDatum(4.0); Assert.AreEqual(2, runningAverage.GetCount()); Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON); runningAverage.ChangeDatum(0.0); Assert.AreEqual(2, runningAverage.GetCount()); Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON); runningAverage.ChangeDatum(2.0); Assert.AreEqual(2, runningAverage.GetCount()); Assert.AreEqual(2.0, runningAverage.GetAverage(), EPSILON); }
public void testCopyConstructor() { IRunningAverage runningAverage = new FullRunningAverage(); runningAverage.AddDatum(1.0); runningAverage.AddDatum(1.0); Assert.AreEqual(2, runningAverage.GetCount()); Assert.AreEqual(1.0, runningAverage.GetAverage(), EPSILON); IRunningAverage copy = new FullRunningAverage(runningAverage.GetCount(), runningAverage.GetAverage()); Assert.AreEqual(2, copy.GetCount()); Assert.AreEqual(1.0, copy.GetAverage(), EPSILON); }
double getAveragePreference() { IRunningAverage average = new FullRunningAverage(); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { foreach (IPreference pref in dataModel.GetPreferencesFromUser(it.Current)) { average.AddDatum(pref.GetValue()); } } return average.GetAverage(); }
public void toyExampleImplicit() { var observations = new double[4,4] { { 5.0, 5.0, 2.0, 0 }, { 2.0, 0, 3.0, 5.0 }, { 0, 5.0, 0, 3.0 }, { 3.0, 0, 0, 5.0 } }; var preferences = new double[4, 4] { { 1.0, 1.0, 1.0, 0 }, { 1.0, 0, 1.0, 1.0 }, { 0, 1.0, 0, 1.0 }, { 1.0, 0, 0, 1.0 } }; double alpha = 20; ALSWRFactorizer factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 5, true, alpha); SVDRecommender svdRecommender = new SVDRecommender(dataModel, factorizer); IRunningAverage avg = new FullRunningAverage(); for (int sliceIdx = 0; sliceIdx < preferences.GetLength(0); sliceIdx++) { var slice = MatrixUtil.viewRow(preferences, sliceIdx); for (var eIndex=0; eIndex<slice.Length; eIndex++) { var e = slice[eIndex]; long userID = sliceIdx + 1; long itemID = eIndex + 1; if (!Double.IsNaN(e)) { double pref = e; double estimate = svdRecommender.EstimatePreference(userID, itemID); double confidence = 1 + alpha * observations[sliceIdx, eIndex]; double err = confidence * (pref - estimate) * (pref - estimate); avg.AddDatum(err); Console.WriteLine("Comparing preference of user [{0}] towards item [{1}], was [{2}] with confidence [{3}] " + "estimate is [{4}]", sliceIdx, eIndex, pref, confidence, estimate); } } } double rmse = Math.Sqrt(avg.GetAverage()); Console.WriteLine("RMSE: {0}", rmse); Assert.True(rmse < 0.4); }
public void toyExample() { SVDRecommender svdRecommender = new SVDRecommender(dataModel, factorizer); /// a hold out test would be better, but this is just a toy example so we only check that the /// factorization is close to the original matrix IRunningAverage avg = new FullRunningAverage(); var userIDs = dataModel.GetUserIDs(); while (userIDs.MoveNext()) { long userID = userIDs.Current; foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) { double rating = pref.GetValue(); double estimate = svdRecommender.EstimatePreference(userID, pref.GetItemID()); double err = rating - estimate; avg.AddDatum(err * err); } } double rmse = Math.Sqrt(avg.GetAverage()); Assert.True(rmse < 0.2); }
public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder, IDataModelBuilder dataModelBuilder, IDataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) { //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); //Preconditions.checkArgument(dataModel != null, "dataModel is null"); //Preconditions.checkArgument(at >= 1, "at must be at least 1"); //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, // "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0"); int numItems = dataModel.GetNumItems(); IRunningAverage precision = new FullRunningAverage(); IRunningAverage recall = new FullRunningAverage(); IRunningAverage fallOut = new FullRunningAverage(); IRunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); int numRelevantItems = relevantItemIDs.Count(); if (numRelevantItems <= 0) { continue; } FastByIDMap<IPreferenceArray> trainingUsers = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers()); var it2 = dataModel.GetUserIDs(); while (it2.MoveNext()) { dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel); } IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers) : dataModelBuilder.BuildDataModel(trainingUsers); try { trainingModel.GetPreferencesFromUser(userID); } catch (NoSuchUserException nsee) { continue; // Oops we excluded all prefs for the user -- just move on } int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count(); if (size < 2 * at) { // Really not enough prefs to meaningfully evaluate this user continue; } IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel); int intersectionSize = 0; var recommendedItems = recommender.Recommend(userID, at, rescorer); foreach (IRecommendedItem recommendedItem in recommendedItems) { if (relevantItemIDs.Contains(recommendedItem.GetItemID())) { intersectionSize++; } } int numRecommendedItems = recommendedItems.Count; // Precision if (numRecommendedItems > 0) { precision.AddDatum((double) intersectionSize / (double) numRecommendedItems); } // Recall recall.AddDatum((double) intersectionSize / (double) numRelevantItems); // Fall-out if (numRelevantItems < size) { fallOut.AddDatum((double) (numRecommendedItems - intersectionSize) / (double) (numItems - numRelevantItems)); } // nDCG // In computing, assume relevant IDs have relevance 1 and others 0 double cumulativeGain = 0.0; double idealizedGain = 0.0; for (int i = 0; i < numRecommendedItems; i++) { IRecommendedItem item = recommendedItems[i]; double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here if (relevantItemIDs.Contains(item.GetItemID())) { cumulativeGain += discount; } // otherwise we're multiplying discount by relevance 0 so it doesn't do anything // Ideally results would be ordered with all relevant ones first, so this theoretical // ideal list starts with number of relevant items equal to the total number of relevant items if (i < numRelevantItems) { idealizedGain += discount; } } if (idealizedGain > 0.0) { nDCG.AddDatum(cumulativeGain / idealizedGain); } // Reach numUsersRecommendedFor++; if (numRecommendedItems > 0) { numUsersWithRecommendations++; } stopWatch.Stop(); log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds); log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); } return new IRStatisticsImpl( precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); }
public double averateRating(long itemID) { IPreferenceArray prefs = dataModel.GetPreferencesForItem(itemID); IRunningAverage avg = new FullRunningAverage(); foreach (IPreference pref in prefs) { avg.AddDatum(pref.GetValue()); } return avg.GetAverage(); }
public void testRecommenderWithSyntheticData() { setUpSyntheticData(); factorizer= new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0); svdRecommender = new SVDRecommender(dataModel, factorizer); /// a hold out test would be better, but this is just a toy example so we only check that the /// factorization is close to the original matrix IRunningAverage avg = new FullRunningAverage(); var userIDs = dataModel.GetUserIDs(); while (userIDs.MoveNext()) { long userID = userIDs.Current; foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) { double rating = pref.GetValue(); double estimate = svdRecommender.EstimatePreference(userID, pref.GetItemID()); double err = rating - estimate; avg.AddDatum(err * err); } } double rmse = Math.Sqrt(avg.GetAverage()); logger.Info("rmse: " + rmse); Assert.True(rmse < 0.2); }
public void testFactorizerWithWithSyntheticData() { setUpSyntheticData(); var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); factorizer = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0); Factorization factorization = factorizer.Factorize(); stopWatch.Stop(); long duration = stopWatch.ElapsedMilliseconds; /// a hold out test would be better, but this is just a toy example so we only check that the /// factorization is close to the original matrix IRunningAverage avg = new FullRunningAverage(); var userIDs = dataModel.GetUserIDs(); IEnumerator<long> itemIDs; while (userIDs.MoveNext()) { long userID = userIDs.Current; foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) { double rating = pref.GetValue(); var userVector = factorization.getUserFeatures(userID); var itemVector = factorization.getItemFeatures(pref.GetItemID()); double estimate = vectorDot( userVector, itemVector); double err = rating - estimate; avg.AddDatum(err * err); } } double sum = 0.0; userIDs = dataModel.GetUserIDs(); while (userIDs.MoveNext()) { long userID = userIDs.Current; var userVector = factorization.getUserFeatures(userID); double regularization = vectorDot( userVector, userVector); sum += regularization; } itemIDs = dataModel.GetItemIDs(); while (itemIDs.MoveNext()) { long itemID = itemIDs.Current; var itemVector = factorization.getUserFeatures(itemID); double regularization = vectorDot( itemVector, itemVector); sum += regularization; } double rmse = Math.Sqrt(avg.GetAverage()); double loss = avg.GetAverage() / 2 + lambda / 2 * sum; logger.Info("RMSE: " + rmse + ";\tLoss: " + loss + ";\tTime Used: " + duration + "ms"); Assert.True(rmse < 0.2); }