public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); return(1.0 / (1.0 + Math.Abs(dataModel.GetPreferencesFromUser(userID1).Get(0).GetValue() - dataModel.GetPreferencesFromUser(userID2).Get(0).GetValue()))); }
public void ratingVector() { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(1); double[] ratingVector = ALSWRFactorizer.ratingVector(prefs); Assert.AreEqual(prefs.Length(), ratingVector.Length); Assert.AreEqual(prefs.Get(0).GetValue(), ratingVector[0], EPSILON); Assert.AreEqual(prefs.Get(1).GetValue(), ratingVector[1], EPSILON); Assert.AreEqual(prefs.Get(2).GetValue(), ratingVector[2], EPSILON); }
private int countPreferences() { int numPreferences = 0; var userIDs = dataModel.GetUserIDs(); while (userIDs.MoveNext()) { IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userIDs.Current); numPreferences += preferencesFromUser.Length(); } return(numPreferences); }
public static void Evaluate(IRecommender recommender, IDataModel model, int samples, IRunningAverage tracker, String tag) { printHeader(); var users = recommender.GetDataModel().GetUserIDs(); while (users.MoveNext()) { long userID = users.Current; var recs1 = recommender.Recommend(userID, model.GetNumItems()); IPreferenceArray prefs2 = model.GetPreferencesFromUser(userID); prefs2.SortByValueReversed(); FastIDSet commonSet = new FastIDSet(); long maxItemID = setBits(commonSet, recs1, samples); FastIDSet otherSet = new FastIDSet(); maxItemID = Math.Max(maxItemID, setBits(otherSet, prefs2, samples)); int max = mask(commonSet, otherSet, maxItemID); max = Math.Min(max, samples); if (max < 2) { continue; } long[] items1 = getCommonItems(commonSet, recs1, max); long[] items2 = getCommonItems(commonSet, prefs2, max); double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2); tracker.AddDatum(variance); } }
private void buildAverageDiffs() { lock (this) { //buildAveragesLock.writeLock().lock(); IDataModel dataModel = GetDataModel(); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(it.Current); int size = prefs.Length(); for (int i = 0; i < size; i++) { long itemID = prefs.GetItemID(i); IRunningAverage average = itemAverages.Get(itemID); if (average == null) { average = new FullRunningAverage(); itemAverages.Put(itemID, average); } average.AddDatum(prefs.GetValue(i)); } } } //finally { //buildAveragesLock.writeLock().unlock(); //} }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap <IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count > 0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public RandomRecommender(IDataModel dataModel) : base(dataModel) { float maxPref = float.NegativeInfinity; float minPref = float.PositiveInfinity; var userIterator = dataModel.GetUserIDs(); while (userIterator.MoveNext()) { long userID = userIterator.Current; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); for (int i = 0; i < prefs.Length(); i++) { float prefValue = prefs.GetValue(i); if (prefValue < minPref) { minPref = prefValue; } if (prefValue > maxPref) { maxPref = prefValue; } } } this.minPref = minPref; this.maxPref = maxPref; }
public void testPreferenceShufflerWithSyntheticData() { setUpSyntheticData(); ParallelSGDFactorizer.PreferenceShuffler shuffler = new ParallelSGDFactorizer.PreferenceShuffler(dataModel); shuffler.shuffle(); shuffler.stage(); FastByIDMap <FastByIDMap <bool?> > checkedLst = new FastByIDMap <FastByIDMap <bool?> >(); for (int i = 0; i < shuffler.size(); i++) { IPreference pref = shuffler.get(i); float?value = dataModel.GetPreferenceValue(pref.GetUserID(), pref.GetItemID()); Assert.AreEqual(pref.GetValue(), value.Value, 0.0); if (!checkedLst.ContainsKey(pref.GetUserID())) { checkedLst.Put(pref.GetUserID(), new FastByIDMap <bool?>()); } Assert.IsNull(checkedLst.Get(pref.GetUserID()).Get(pref.GetItemID())); checkedLst.Get(pref.GetUserID()).Put(pref.GetItemID(), true); } var userIDs = dataModel.GetUserIDs(); int index = 0; while (userIDs.MoveNext()) { long userID = userIDs.Current; IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID); foreach (IPreference preference in preferencesFromUser) { Assert.True(checkedLst.Get(preference.GetUserID()).Get(preference.GetItemID()).Value); index++; } } Assert.AreEqual(index, shuffler.size()); }
/// <summary>Exports the simple user IDs and preferences in the data model.</summary> /// <returns>a <see cref="FastByIDMap"/> mapping user IDs to <see cref="IPreferenceArray"/>s representing that user's preferences</returns> public static FastByIDMap <IPreferenceArray> ToDataMap(IDataModel dataModel) { FastByIDMap <IPreferenceArray> data = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers()); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; data.Put(userID, dataModel.GetPreferencesFromUser(userID)); } return(data); }
public virtual IPreferenceArray GetPreferencesFromUser(long userID) { if (userID == TEMP_USER_ID) { if (tempPrefs == null) { throw new NoSuchUserException(TEMP_USER_ID); } return(tempPrefs); } return(_delegate.GetPreferencesFromUser(userID)); }
double getAveragePreference() { IRunningAverage average = new FullRunningAverage(); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { foreach (IPreference pref in dataModel.GetPreferencesFromUser(it.Current)) { average.AddDatum(pref.GetValue()); } } return(average.GetAverage()); }
public FastIDSet GetRelevantItemsIDs(long userID, int at, double relevanceThreshold, IDataModel dataModel) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); FastIDSet relevantItemIDs = new FastIDSet(at); prefs.SortByValueReversed(); for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) { if (prefs.GetValue(i) >= relevanceThreshold) { relevantItemIDs.Add(prefs.GetItemID(i)); } } return relevantItemIDs; }
public FastIDSet GetRelevantItemsIDs(long userID, int at, double relevanceThreshold, IDataModel dataModel) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); FastIDSet relevantItemIDs = new FastIDSet(at); prefs.SortByValueReversed(); for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) { if (prefs.GetValue(i) >= relevanceThreshold) { relevantItemIDs.Add(prefs.GetItemID(i)); } } return(relevantItemIDs); }
private void cachePreferences(IDataModel dataModel) { int numPreferences = countPreferences(dataModel); preferences = new IPreference[numPreferences]; var userIDs = dataModel.GetUserIDs(); int index = 0; while (userIDs.MoveNext()) { long userID = userIDs.Current; IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID); foreach (IPreference preference in preferencesFromUser) { preferences[index++] = preference; } } }
public List <IRecommendedItem> RecommendedBecause(long userID, long itemID, int howMany) { //Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); IDataModel model = GetDataModel(); TopItems.IEstimator <long> estimator = new RecommendedBecauseEstimator(this, userID, itemID); IPreferenceArray prefs = model.GetPreferencesFromUser(userID); int size = prefs.Length(); FastIDSet allUserItems = new FastIDSet(size); for (int i = 0; i < size; i++) { allUserItems.Add(prefs.GetItemID(i)); } allUserItems.Remove(itemID); return(TopItems.GetTopItems(howMany, allUserItems.GetEnumerator(), null, estimator)); }
private void splitOneUsersPrefs(double trainingPercentage, FastByIDMap <IPreferenceArray> trainingPrefs, FastByIDMap <IPreferenceArray> testPrefs, long userID, IDataModel dataModel) { List <IPreference> oneUserTrainingPrefs = null; List <IPreference> oneUserTestPrefs = null; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i)); if (random.nextDouble() < trainingPercentage) { if (oneUserTrainingPrefs == null) { oneUserTrainingPrefs = new List <IPreference>(3); } oneUserTrainingPrefs.Add(newPref); } else { if (oneUserTestPrefs == null) { oneUserTestPrefs = new List <IPreference>(3); } oneUserTestPrefs.Add(newPref); } } if (oneUserTrainingPrefs != null) { trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs)); if (oneUserTestPrefs != null) { testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs)); } } }
private void buildAverageDiffs() { lock (this) { //buildAveragesLock.writeLock().lock(); IDataModel dataModel = GetDataModel(); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { long itemID = prefs.GetItemID(i); float value = prefs.GetValue(i); addDatumAndCreateIfNeeded(itemID, value, itemAverages); addDatumAndCreateIfNeeded(userID, value, userAverages); overallAveragePrefValue.AddDatum(value); } } } /*finally { * buildAveragesLock.writeLock().unlock(); * }*/ }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap<IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count>0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public override IPreferenceArray GetPreferencesFromUser(long userID) { return(_delegate.GetPreferencesFromUser(userID)); }
public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder, IDataModelBuilder dataModelBuilder, IDataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) { //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); //Preconditions.checkArgument(dataModel != null, "dataModel is null"); //Preconditions.checkArgument(at >= 1, "at must be at least 1"); //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, // "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0"); int numItems = dataModel.GetNumItems(); IRunningAverage precision = new FullRunningAverage(); IRunningAverage recall = new FullRunningAverage(); IRunningAverage fallOut = new FullRunningAverage(); IRunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); int numRelevantItems = relevantItemIDs.Count(); if (numRelevantItems <= 0) { continue; } FastByIDMap<IPreferenceArray> trainingUsers = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers()); var it2 = dataModel.GetUserIDs(); while (it2.MoveNext()) { dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel); } IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers) : dataModelBuilder.BuildDataModel(trainingUsers); try { trainingModel.GetPreferencesFromUser(userID); } catch (NoSuchUserException nsee) { continue; // Oops we excluded all prefs for the user -- just move on } int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count(); if (size < 2 * at) { // Really not enough prefs to meaningfully evaluate this user continue; } IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel); int intersectionSize = 0; var recommendedItems = recommender.Recommend(userID, at, rescorer); foreach (IRecommendedItem recommendedItem in recommendedItems) { if (relevantItemIDs.Contains(recommendedItem.GetItemID())) { intersectionSize++; } } int numRecommendedItems = recommendedItems.Count; // Precision if (numRecommendedItems > 0) { precision.AddDatum((double) intersectionSize / (double) numRecommendedItems); } // Recall recall.AddDatum((double) intersectionSize / (double) numRelevantItems); // Fall-out if (numRelevantItems < size) { fallOut.AddDatum((double) (numRecommendedItems - intersectionSize) / (double) (numItems - numRelevantItems)); } // nDCG // In computing, assume relevant IDs have relevance 1 and others 0 double cumulativeGain = 0.0; double idealizedGain = 0.0; for (int i = 0; i < numRecommendedItems; i++) { IRecommendedItem item = recommendedItems[i]; double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here if (relevantItemIDs.Contains(item.GetItemID())) { cumulativeGain += discount; } // otherwise we're multiplying discount by relevance 0 so it doesn't do anything // Ideally results would be ordered with all relevant ones first, so this theoretical // ideal list starts with number of relevant items equal to the total number of relevant items if (i < numRelevantItems) { idealizedGain += discount; } } if (idealizedGain > 0.0) { nDCG.AddDatum(cumulativeGain / idealizedGain); } // Reach numUsersRecommendedFor++; if (numRecommendedItems > 0) { numUsersWithRecommendations++; } stopWatch.Stop(); log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds); log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); } return new IRStatisticsImpl( precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); }
private void splitOneUsersPrefs(double trainingPercentage, FastByIDMap<IPreferenceArray> trainingPrefs, FastByIDMap<IPreferenceArray> testPrefs, long userID, IDataModel dataModel) { List<IPreference> oneUserTrainingPrefs = null; List<IPreference> oneUserTestPrefs = null; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i)); if (random.nextDouble() < trainingPercentage) { if (oneUserTrainingPrefs == null) { oneUserTrainingPrefs = new List<IPreference>(3); } oneUserTrainingPrefs.Add(newPref); } else { if (oneUserTestPrefs == null) { oneUserTestPrefs = new List<IPreference>(3); } oneUserTestPrefs.Add(newPref); } } if (oneUserTrainingPrefs != null) { trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs)); if (oneUserTestPrefs != null) { testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs)); } } }
public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); IPreferenceArray xPrefs = dataModel.GetPreferencesFromUser(userID1); IPreferenceArray yPrefs = dataModel.GetPreferencesFromUser(userID2); int xLength = xPrefs.Length(); int yLength = yPrefs.Length(); if (xLength == 0 || yLength == 0) { return(Double.NaN); } long xIndex = xPrefs.GetItemID(0); long yIndex = yPrefs.GetItemID(0); int xPrefIndex = 0; int yPrefIndex = 0; double sumX = 0.0; double sumX2 = 0.0; double sumY = 0.0; double sumY2 = 0.0; double sumXY = 0.0; double sumXYdiff2 = 0.0; int count = 0; bool hasInferrer = inferrer != null; while (true) { int compare = xIndex <yIndex ? -1 : xIndex> yIndex ? 1 : 0; if (hasInferrer || compare == 0) { double x; double y; if (xIndex == yIndex) { // Both users expressed a preference for the item x = xPrefs.GetValue(xPrefIndex); y = yPrefs.GetValue(yPrefIndex); } else { // Only one user expressed a preference, but infer the other one's preference and tally // as if the other user expressed that preference if (compare < 0) { // X has a value; infer Y's x = xPrefs.GetValue(xPrefIndex); y = inferrer.InferPreference(userID2, xIndex); } else { // compare > 0 // Y has a value; infer X's x = inferrer.InferPreference(userID1, yIndex); y = yPrefs.GetValue(yPrefIndex); } } sumXY += x * y; sumX += x; sumX2 += x * x; sumY += y; sumY2 += y * y; double diff = x - y; sumXYdiff2 += diff * diff; count++; } if (compare <= 0) { if (++xPrefIndex >= xLength) { if (hasInferrer) { // Must count other Ys; pretend next X is far away if (yIndex == long.MaxValue) { // ... but stop if both are done! break; } xIndex = long.MaxValue; } else { break; } } else { xIndex = xPrefs.GetItemID(xPrefIndex); } } if (compare >= 0) { if (++yPrefIndex >= yLength) { if (hasInferrer) { // Must count other Xs; pretend next Y is far away if (xIndex == long.MaxValue) { // ... but stop if both are done! break; } yIndex = long.MaxValue; } else { break; } } else { yIndex = yPrefs.GetItemID(yPrefIndex); } } } // "Center" the data. If my math is correct, this'll do it. double result; if (centerData) { double meanX = sumX / count; double meanY = sumY / count; // double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY; double centeredSumXY = sumXY - meanY * sumX; // double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX; double centeredSumX2 = sumX2 - meanX * sumX; // double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY; double centeredSumY2 = sumY2 - meanY * sumY; result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2); } else { result = computeResult(count, sumXY, sumX2, sumY2, sumXYdiff2); } if (!Double.IsNaN(result)) { result = normalizeWeightResult(result, count, cachedNumItems); } return(result); }
public double UserSimilarity(long userID1, long userID2) { IPreferenceArray xPrefs = dataModel.GetPreferencesFromUser(userID1); IPreferenceArray yPrefs = dataModel.GetPreferencesFromUser(userID2); int xLength = xPrefs.Length(); int yLength = yPrefs.Length(); if (xLength <= 1 || yLength <= 1) { return(Double.NaN); } // Copy prefs since we need to modify pref values to ranks xPrefs = xPrefs.Clone(); yPrefs = yPrefs.Clone(); // First sort by values from low to high xPrefs.SortByValue(); yPrefs.SortByValue(); // Assign ranks from low to high float nextRank = 1.0f; for (int i = 0; i < xLength; i++) { // ... but only for items that are common to both pref arrays if (yPrefs.HasPrefWithItemID(xPrefs.GetItemID(i))) { xPrefs.SetValue(i, nextRank); nextRank += 1.0f; } // Other values are bogus but don't matter } nextRank = 1.0f; for (int i = 0; i < yLength; i++) { if (xPrefs.HasPrefWithItemID(yPrefs.GetItemID(i))) { yPrefs.SetValue(i, nextRank); nextRank += 1.0f; } } xPrefs.SortByItem(); yPrefs.SortByItem(); long xIndex = xPrefs.GetItemID(0); long yIndex = yPrefs.GetItemID(0); int xPrefIndex = 0; int yPrefIndex = 0; double sumXYRankDiff2 = 0.0; int count = 0; while (true) { int compare = xIndex <yIndex ? -1 : xIndex> yIndex ? 1 : 0; if (compare == 0) { double diff = xPrefs.GetValue(xPrefIndex) - yPrefs.GetValue(yPrefIndex); sumXYRankDiff2 += diff * diff; count++; } if (compare <= 0) { if (++xPrefIndex >= xLength) { break; } xIndex = xPrefs.GetItemID(xPrefIndex); } if (compare >= 0) { if (++yPrefIndex >= yLength) { break; } yIndex = yPrefs.GetItemID(yPrefIndex); } } if (count <= 1) { return(Double.NaN); } // When ranks are unique, this formula actually gives the Pearson correlation return(1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1))); }
public override Factorization Factorize() { log.Info("starting to compute the factorization..."); Features features = new Features(this); /// feature maps necessary for solving for implicit feedback IDictionary <int, double[]> userY = null; IDictionary <int, double[]> itemY = null; if (usesImplicitFeedback) { userY = userFeaturesMapping(dataModel.GetUserIDs(), dataModel.GetNumUsers(), features.getU()); itemY = itemFeaturesMapping(dataModel.GetItemIDs(), dataModel.GetNumItems(), features.getM()); } IList <Task> tasks; for (int iteration = 0; iteration < numIterations; iteration++) { log.Info("iteration {0}", iteration); /// fix M - compute U tasks = new List <Task>(); var userIDsIterator = dataModel.GetUserIDs(); try { ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, itemY) : null; while (userIDsIterator.MoveNext()) { long userID = userIDsIterator.Current; var itemIDsFromUser = dataModel.GetItemIDsFromUser(userID).GetEnumerator(); IPreferenceArray userPrefs = dataModel.GetPreferencesFromUser(userID); tasks.Add(Task.Factory.StartNew(() => { List <double[]> featureVectors = new List <double[]>(); while (itemIDsFromUser.MoveNext()) { long itemID = itemIDsFromUser.Current; featureVectors.Add(features.getItemFeatureColumn(itemIndex(itemID))); } var userFeatures = usesImplicitFeedback ? implicitFeedbackSolver.solve(sparseUserRatingVector(userPrefs)) : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(userPrefs), lambda, numFeatures); features.setFeatureColumnInU(userIndex(userID), userFeatures); } )); } } finally { // queue.shutdown(); try { Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumUsers()); } catch (AggregateException e) { log.Warn("Error when computing user features", e); throw e; } } /// fix U - compute M //queue = createQueue(); tasks = new List <Task>(); var itemIDsIterator = dataModel.GetItemIDs(); try { ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, userY) : null; while (itemIDsIterator.MoveNext()) { long itemID = itemIDsIterator.Current; IPreferenceArray itemPrefs = dataModel.GetPreferencesForItem(itemID); tasks.Add(Task.Factory.StartNew(() => { var featureVectors = new List <double[]>(); foreach (IPreference pref in itemPrefs) { long userID = pref.GetUserID(); featureVectors.Add(features.getUserFeatureColumn(userIndex(userID))); } var itemFeatures = usesImplicitFeedback ? implicitFeedbackSolver.solve(sparseItemRatingVector(itemPrefs)) : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(itemPrefs), lambda, numFeatures); features.setFeatureColumnInM(itemIndex(itemID), itemFeatures); })); } } finally { try { Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumItems()); //queue.awaitTermination(dataModel.getNumItems(), TimeUnit.SECONDS); } catch (AggregateException e) { log.Warn("Error when computing item features", e); throw e; } } } log.Info("finished computation of the factorization..."); return(createFactorization(features.getU(), features.getM())); }
/// <summary>Exports the simple user IDs and preferences in the data model.</summary> /// <returns>a <see cref="FastByIDMap"/> mapping user IDs to <see cref="IPreferenceArray"/>s representing that user's preferences</returns> public static FastByIDMap<IPreferenceArray> ToDataMap(IDataModel dataModel) { FastByIDMap<IPreferenceArray> data = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers()); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; data.Put(userID, dataModel.GetPreferencesFromUser(userID)); } return data; }
private int countPreferences(IDataModel dataModel) { int numPreferences = 0; var userIDs = dataModel.GetUserIDs(); while (userIDs.MoveNext()) { IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userIDs.Current); numPreferences += preferencesFromUser.Length(); } return numPreferences; }
public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder, IDataModelBuilder dataModelBuilder, IDataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) { //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); //Preconditions.checkArgument(dataModel != null, "dataModel is null"); //Preconditions.checkArgument(at >= 1, "at must be at least 1"); //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, // "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0"); int numItems = dataModel.GetNumItems(); IRunningAverage precision = new FullRunningAverage(); IRunningAverage recall = new FullRunningAverage(); IRunningAverage fallOut = new FullRunningAverage(); IRunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); int numRelevantItems = relevantItemIDs.Count(); if (numRelevantItems <= 0) { continue; } FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers()); var it2 = dataModel.GetUserIDs(); while (it2.MoveNext()) { dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel); } IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers) : dataModelBuilder.BuildDataModel(trainingUsers); try { trainingModel.GetPreferencesFromUser(userID); } catch (NoSuchUserException nsee) { continue; // Oops we excluded all prefs for the user -- just move on } int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count(); if (size < 2 * at) { // Really not enough prefs to meaningfully evaluate this user continue; } IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel); int intersectionSize = 0; var recommendedItems = recommender.Recommend(userID, at, rescorer); foreach (IRecommendedItem recommendedItem in recommendedItems) { if (relevantItemIDs.Contains(recommendedItem.GetItemID())) { intersectionSize++; } } int numRecommendedItems = recommendedItems.Count; // Precision if (numRecommendedItems > 0) { precision.AddDatum((double)intersectionSize / (double)numRecommendedItems); } // Recall recall.AddDatum((double)intersectionSize / (double)numRelevantItems); // Fall-out if (numRelevantItems < size) { fallOut.AddDatum((double)(numRecommendedItems - intersectionSize) / (double)(numItems - numRelevantItems)); } // nDCG // In computing, assume relevant IDs have relevance 1 and others 0 double cumulativeGain = 0.0; double idealizedGain = 0.0; for (int i = 0; i < numRecommendedItems; i++) { IRecommendedItem item = recommendedItems[i]; double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here if (relevantItemIDs.Contains(item.GetItemID())) { cumulativeGain += discount; } // otherwise we're multiplying discount by relevance 0 so it doesn't do anything // Ideally results would be ordered with all relevant ones first, so this theoretical // ideal list starts with number of relevant items equal to the total number of relevant items if (i < numRelevantItems) { idealizedGain += discount; } } if (idealizedGain > 0.0) { nDCG.AddDatum(cumulativeGain / idealizedGain); } // Reach numUsersRecommendedFor++; if (numRecommendedItems > 0) { numUsersWithRecommendations++; } stopWatch.Stop(); log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds); log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double)numUsersWithRecommendations / (double)numUsersRecommendedFor); } return(new IRStatisticsImpl( precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double)numUsersWithRecommendations / (double)numUsersRecommendedFor)); }