public void testStrategy() { List <IPreference> prefsOfUser123 = new List <IPreference>(); prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f)); List <IPreference> prefsOfUser456 = new List <IPreference>(); prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f)); prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f)); List <IPreference> prefsOfUser789 = new List <IPreference>(); prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f)); prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(123L, prefArrayOfUser123); userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456)); userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789)); IDataModel dataModel = new GenericDataModel(userData); ICandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems()); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel); Assert.True(candidateItems.Count() <= 1); Assert.False(candidateItems.Contains(1L)); }
public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); FastIDSet xPrefs = dataModel.GetItemIDsFromUser(userID1); FastIDSet yPrefs = dataModel.GetItemIDsFromUser(userID2); int xPrefsSize = xPrefs.Count(); int yPrefsSize = yPrefs.Count(); if (xPrefsSize == 0 && yPrefsSize == 0) { return(Double.NaN); } if (xPrefsSize == 0 || yPrefsSize == 0) { return(0.0); } int intersectionSize = xPrefsSize < yPrefsSize?yPrefs.IntersectionSize(xPrefs) : xPrefs.IntersectionSize(yPrefs); if (intersectionSize == 0) { return(Double.NaN); } int unionSize = xPrefsSize + yPrefsSize - intersectionSize; return((double)intersectionSize / (double)unionSize); }
public void testStrategy() { FastIDSet allItemIDs = new FastIDSet(); allItemIDs.AddAll(new long[] { 1L, 2L, 3L }); FastIDSet preferredItemIDs = new FastIDSet(1); preferredItemIDs.Add(2L); var dataModelMock = new DynamicMock(typeof(IDataModel)); dataModelMock.ExpectAndReturn("GetNumItems", 3); dataModelMock.ExpectAndReturn("GetItemIDs", allItemIDs.GetEnumerator()); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(123L, 2L, 1.0f) }); ICandidateItemsStrategy strategy = new AllUnknownItemsCandidateItemsStrategy(); //EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance); Assert.AreEqual(2, candidateItems.Count()); Assert.True(candidateItems.Contains(1L)); Assert.True(candidateItems.Contains(3L)); dataModelMock.Verify(); //EasyMock.verify(dataModel); }
public void testClear() { FastIDSet set = new FastIDSet(); set.Add(1); set.Clear(); Assert.AreEqual(0, set.Count()); Assert.True(set.IsEmpty()); Assert.False(set.Contains(1)); }
public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); FastIDSet prefs1 = dataModel.GetItemIDsFromUser(userID1); FastIDSet prefs2 = dataModel.GetItemIDsFromUser(userID2); int prefs1Size = prefs1.Count(); int prefs2Size = prefs2.Count(); int intersectionSize = prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2); return(doSimilarity(prefs1Size, prefs2Size, intersectionSize)); }
protected override void prepareTraining() { base.prepareTraining(); var random = RandomUtils.getRandom(); p = new double[dataModel.GetNumUsers()][]; for (int i = 0; i < p.Length; i++) { p[i] = new double[numFeatures]; for (int feature = 0; feature < FEATURE_OFFSET; feature++) { p[i][feature] = 0; } for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { p[i][feature] = random.nextGaussian() * randomNoise; } } y = new double[dataModel.GetNumItems()][]; for (int i = 0; i < y.Length; i++) { y[i] = new double[numFeatures]; for (int feature = 0; feature < FEATURE_OFFSET; feature++) { y[i][feature] = 0; } for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { y[i][feature] = random.nextGaussian() * randomNoise; } } /// get internal item IDs which we will need several times itemsByUser = new Dictionary <int, List <int> >(); var userIDs = dataModel.GetUserIDs(); while (userIDs.MoveNext()) { long userId = userIDs.Current; int userIdx = userIndex(userId); FastIDSet itemIDsFromUser = dataModel.GetItemIDsFromUser(userId); List <int> itemIndexes = new List <int>(itemIDsFromUser.Count()); itemsByUser[userIdx] = itemIndexes; foreach (long itemID2 in itemIDsFromUser) { int i2 = itemIndex(itemID2); itemIndexes.Add(i2); } } }
public FastIDSet GetRelevantItemsIDs(long userID, int at, double relevanceThreshold, IDataModel dataModel) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); FastIDSet relevantItemIDs = new FastIDSet(at); prefs.SortByValueReversed(); for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) { if (prefs.GetValue(i) >= relevanceThreshold) { relevantItemIDs.Add(prefs.GetItemID(i)); } } return relevantItemIDs; }
private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) { if (itemIDs.Count() > maxItemsPerUser) { var it = new SamplinglongPrimitiveIterator(itemIDs.GetEnumerator(), (double)maxItemsPerUser / itemIDs.Count()); while (it.MoveNext()) { possibleItemIDs.Add(it.Current); } } else { possibleItemIDs.AddAll(itemIDs); } }
public FastIDSet GetRelevantItemsIDs(long userID, int at, double relevanceThreshold, IDataModel dataModel) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); FastIDSet relevantItemIDs = new FastIDSet(at); prefs.SortByValueReversed(); for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) { if (prefs.GetValue(i) >= relevanceThreshold) { relevantItemIDs.Add(prefs.GetItemID(i)); } } return(relevantItemIDs); }
public override int GetNumUsersWithPreferenceFor(long itemID1, long itemID2) { FastIDSet userIDs1 = preferenceForItems.Get(itemID1); if (userIDs1 == null) { return(0); } FastIDSet userIDs2 = preferenceForItems.Get(itemID2); if (userIDs2 == null) { return(0); } return(userIDs1.Count() < userIDs2.Count() ? userIDs2.IntersectionSize(userIDs1) : userIDs1.IntersectionSize(userIDs2)); }
public override IPreferenceArray GetPreferencesForItem(long itemID) { FastIDSet userIDs = preferenceForItems.Get(itemID); if (userIDs == null) { throw new NoSuchItemException(itemID); } IPreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.Count()); int i = 0; var it = userIDs.GetEnumerator(); while (it.MoveNext()) { prefArray.SetUserID(i, it.Current); prefArray.SetItemID(i, itemID); i++; } return(prefArray); }
/// @throws NoSuchUserException /// if there is no such user public override IPreferenceArray GetPreferencesFromUser(long userID) { FastIDSet itemIDs = preferenceFromUsers.Get(userID); if (itemIDs == null) { throw new NoSuchUserException(userID); } IPreferenceArray prefArray = new BooleanUserPreferenceArray(itemIDs.Count()); int i = 0; var it = itemIDs.GetEnumerator(); while (it.MoveNext()) { prefArray.SetUserID(i, userID); prefArray.SetItemID(i, it.Current); i++; } return(prefArray); }
public void testStrategy() { FastIDSet itemIDsFromUser123 = new FastIDSet(); itemIDsFromUser123.Add(1L); FastIDSet itemIDsFromUser456 = new FastIDSet(); itemIDsFromUser456.Add(1L); itemIDsFromUser456.Add(2L); List <IPreference> prefs = new List <IPreference>(); prefs.Add(new GenericPreference(123L, 1L, 1.0f)); prefs.Add(new GenericPreference(456L, 1L, 1.0f)); IPreferenceArray preferencesForItem1 = new GenericItemPreferenceArray(prefs); var dataModelMock = new DynamicMock(typeof(IDataModel)); dataModelMock.ExpectAndReturn("GetPreferencesForItem", preferencesForItem1, (1L)); dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser123, (123L)); dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser456, (456L)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(123L, 1L, 1.0f) }); ICandidateItemsStrategy strategy = new PreferredItemsNeighborhoodCandidateItemsStrategy(); //EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance); Assert.AreEqual(1, candidateItems.Count()); Assert.True(candidateItems.Contains(2L)); dataModelMock.Verify(); // EasyMock.verify(dataModel); }
public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); FastIDSet prefs1 = dataModel.GetItemIDsFromUser(userID1); FastIDSet prefs2 = dataModel.GetItemIDsFromUser(userID2); long prefs1Size = prefs1.Count(); long prefs2Size = prefs2.Count(); long intersectionSize = prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2); if (intersectionSize == 0) { return(Double.NaN); } long numItems = dataModel.GetNumItems(); double logLikelihood = LogLikelihood.logLikelihoodRatio(intersectionSize, prefs2Size - intersectionSize, prefs1Size - intersectionSize, numItems - prefs1Size - prefs2Size + intersectionSize); return(1.0 - 1.0 / (1.0 + logLikelihood)); }
public void testSizeEmpty() { FastIDSet set = new FastIDSet(); Assert.AreEqual(0, set.Count()); Assert.True(set.IsEmpty()); set.Add(1); Assert.AreEqual(1, set.Count()); Assert.False(set.IsEmpty()); set.Remove(1); Assert.AreEqual(0, set.Count()); Assert.True(set.IsEmpty()); }
public override int GetNumUsersWithPreferenceFor(long itemID) { FastIDSet userIDs1 = preferenceForItems.Get(itemID); return(userIDs1 == null ? 0 : userIDs1.Count()); }
public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder, IDataModelBuilder dataModelBuilder, IDataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) { //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); //Preconditions.checkArgument(dataModel != null, "dataModel is null"); //Preconditions.checkArgument(at >= 1, "at must be at least 1"); //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, // "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0"); int numItems = dataModel.GetNumItems(); IRunningAverage precision = new FullRunningAverage(); IRunningAverage recall = new FullRunningAverage(); IRunningAverage fallOut = new FullRunningAverage(); IRunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); int numRelevantItems = relevantItemIDs.Count(); if (numRelevantItems <= 0) { continue; } FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers()); var it2 = dataModel.GetUserIDs(); while (it2.MoveNext()) { dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel); } IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers) : dataModelBuilder.BuildDataModel(trainingUsers); try { trainingModel.GetPreferencesFromUser(userID); } catch (NoSuchUserException nsee) { continue; // Oops we excluded all prefs for the user -- just move on } int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count(); if (size < 2 * at) { // Really not enough prefs to meaningfully evaluate this user continue; } IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel); int intersectionSize = 0; var recommendedItems = recommender.Recommend(userID, at, rescorer); foreach (IRecommendedItem recommendedItem in recommendedItems) { if (relevantItemIDs.Contains(recommendedItem.GetItemID())) { intersectionSize++; } } int numRecommendedItems = recommendedItems.Count; // Precision if (numRecommendedItems > 0) { precision.AddDatum((double)intersectionSize / (double)numRecommendedItems); } // Recall recall.AddDatum((double)intersectionSize / (double)numRelevantItems); // Fall-out if (numRelevantItems < size) { fallOut.AddDatum((double)(numRecommendedItems - intersectionSize) / (double)(numItems - numRelevantItems)); } // nDCG // In computing, assume relevant IDs have relevance 1 and others 0 double cumulativeGain = 0.0; double idealizedGain = 0.0; for (int i = 0; i < numRecommendedItems; i++) { IRecommendedItem item = recommendedItems[i]; double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here if (relevantItemIDs.Contains(item.GetItemID())) { cumulativeGain += discount; } // otherwise we're multiplying discount by relevance 0 so it doesn't do anything // Ideally results would be ordered with all relevant ones first, so this theoretical // ideal list starts with number of relevant items equal to the total number of relevant items if (i < numRelevantItems) { idealizedGain += discount; } } if (idealizedGain > 0.0) { nDCG.AddDatum(cumulativeGain / idealizedGain); } // Reach numUsersRecommendedFor++; if (numRecommendedItems > 0) { numUsersWithRecommendations++; } stopWatch.Stop(); log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds); log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double)numUsersWithRecommendations / (double)numUsersRecommendedFor); } return(new IRStatisticsImpl( precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double)numUsersWithRecommendations / (double)numUsersRecommendedFor)); }
private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) { if (itemIDs.Count() > maxItemsPerUser) { var it = new SamplinglongPrimitiveIterator(itemIDs.GetEnumerator(), (double) maxItemsPerUser / itemIDs.Count() ); while (it.MoveNext()) { possibleItemIDs.Add(it.Current); } } else { possibleItemIDs.AddAll(itemIDs); } }
public void testVersusHashSet() { FastIDSet actual = new FastIDSet(1); var expected = new HashSet<int>(); //1000000 var r = RandomUtils.getRandom(); for (int i = 0; i < 1000000; i++) { double d = r.nextDouble(); var key = r.nextInt(100); if (d < 0.4) { Assert.AreEqual(expected.Contains(key), actual.Contains(key)); } else { if (d < 0.7) { Assert.AreEqual(expected.Add(key), actual.Add(key)); } else { Assert.AreEqual(expected.Remove(key), actual.Remove(key)); } Assert.AreEqual(expected.Count, actual.Count() ); Assert.AreEqual(expected.Count==0, actual.IsEmpty()); } } }