public void testContainsAndAdd() { FastIDSet set = new FastIDSet(); Assert.False(set.Contains(1)); set.Add(1); Assert.True(set.Contains(1)); }
public void testStrategy() { FastIDSet allItemIDs = new FastIDSet(); allItemIDs.AddAll(new long[] { 1L, 2L, 3L }); FastIDSet preferredItemIDs = new FastIDSet(1); preferredItemIDs.Add(2L); var dataModelMock = new DynamicMock(typeof(IDataModel)); dataModelMock.ExpectAndReturn("GetNumItems", 3); dataModelMock.ExpectAndReturn("GetItemIDs", allItemIDs.GetEnumerator()); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(123L, 2L, 1.0f) }); ICandidateItemsStrategy strategy = new AllUnknownItemsCandidateItemsStrategy(); //EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance); Assert.AreEqual(2, candidateItems.Count()); Assert.True(candidateItems.Contains(1L)); Assert.True(candidateItems.Contains(3L)); dataModelMock.Verify(); //EasyMock.verify(dataModel); }
public void testGrow() { FastIDSet set = new FastIDSet(1); set.Add(1); set.Add(2); Assert.True(set.Contains(1)); Assert.True(set.Contains(2)); }
public void testStrategy() { List <IPreference> prefsOfUser123 = new List <IPreference>(); prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f)); List <IPreference> prefsOfUser456 = new List <IPreference>(); prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f)); prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f)); List <IPreference> prefsOfUser789 = new List <IPreference>(); prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f)); prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(123L, prefArrayOfUser123); userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456)); userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789)); IDataModel dataModel = new GenericDataModel(userData); ICandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems()); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel); Assert.True(candidateItems.Count() <= 1); Assert.False(candidateItems.Contains(1L)); }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap <IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count > 0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public void testClear() { FastIDSet set = new FastIDSet(); set.Add(1); set.Clear(); Assert.AreEqual(0, set.Count()); Assert.True(set.IsEmpty()); Assert.False(set.Contains(1)); }
public override float?GetPreferenceValue(long userID, long itemID) { FastIDSet itemIDs = preferenceFromUsers.Get(userID); if (itemIDs == null) { throw new NoSuchUserException(userID); } if (itemIDs.Contains(itemID)) { return(1.0f); } return(null); }
private static long[] getCommonItems(FastIDSet commonSet, IEnumerable <IRecommendedItem> recs, int max) { long[] commonItems = new long[max]; int index = 0; foreach (IRecommendedItem rec in recs) { long item = rec.GetItemID(); if (commonSet.Contains(item)) { commonItems[index++] = item; } if (index == max) { break; } } return(commonItems); }
private static long[] getCommonItems(FastIDSet commonSet, IPreferenceArray prefs1, int max) { long[] commonItems = new long[max]; int index = 0; for (int i = 0; i < prefs1.Length(); i++) { long item = prefs1.GetItemID(i); if (commonSet.Contains(item)) { commonItems[index++] = item; } if (index == max) { break; } } return(commonItems); }
/// This exists because FastIDSet has 'retainAll' as MASK, but there is /// no count of the number of items in the set. size() is supposed to do /// this but does not work. private static int mask(FastIDSet commonSet, FastIDSet otherSet, long maxItemID) { int count = 0; for (int i = 0; i <= maxItemID; i++) { if (commonSet.Contains(i)) { if (otherSet.Contains(i)) { count++; } else { commonSet.Remove(i); } } } return(count); }
public void testStrategy() { FastIDSet itemIDsFromUser123 = new FastIDSet(); itemIDsFromUser123.Add(1L); FastIDSet itemIDsFromUser456 = new FastIDSet(); itemIDsFromUser456.Add(1L); itemIDsFromUser456.Add(2L); List <IPreference> prefs = new List <IPreference>(); prefs.Add(new GenericPreference(123L, 1L, 1.0f)); prefs.Add(new GenericPreference(456L, 1L, 1.0f)); IPreferenceArray preferencesForItem1 = new GenericItemPreferenceArray(prefs); var dataModelMock = new DynamicMock(typeof(IDataModel)); dataModelMock.ExpectAndReturn("GetPreferencesForItem", preferencesForItem1, (1L)); dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser123, (123L)); dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser456, (456L)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(123L, 1L, 1.0f) }); ICandidateItemsStrategy strategy = new PreferredItemsNeighborhoodCandidateItemsStrategy(); //EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance); Assert.AreEqual(1, candidateItems.Count()); Assert.True(candidateItems.Contains(2L)); dataModelMock.Verify(); // EasyMock.verify(dataModel); }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap<IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count>0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
/// This exists because FastIDSet has 'retainAll' as MASK, but there is /// no count of the number of items in the set. size() is supposed to do /// this but does not work. private static int mask(FastIDSet commonSet, FastIDSet otherSet, long maxItemID) { int count = 0; for (int i = 0; i <= maxItemID; i++) { if (commonSet.Contains(i)) { if (otherSet.Contains(i)) { count++; } else { commonSet.Remove(i); } } } return count; }
private static long[] getCommonItems(FastIDSet commonSet, IEnumerable<IRecommendedItem> recs, int max) { long[] commonItems = new long[max]; int index = 0; foreach (IRecommendedItem rec in recs) { long item = rec.GetItemID(); if (commonSet.Contains(item)) { commonItems[index++] = item; } if (index == max) { break; } } return commonItems; }
private static long[] getCommonItems(FastIDSet commonSet, IPreferenceArray prefs1, int max) { long[] commonItems = new long[max]; int index = 0; for (int i = 0; i < prefs1.Length(); i++) { long item = prefs1.GetItemID(i); if (commonSet.Contains(item)) { commonItems[index++] = item; } if (index == max) { break; } } return commonItems; }
public void testVersusHashSet() { FastIDSet actual = new FastIDSet(1); var expected = new HashSet<int>(); //1000000 var r = RandomUtils.getRandom(); for (int i = 0; i < 1000000; i++) { double d = r.nextDouble(); var key = r.nextInt(100); if (d < 0.4) { Assert.AreEqual(expected.Contains(key), actual.Contains(key)); } else { if (d < 0.7) { Assert.AreEqual(expected.Add(key), actual.Add(key)); } else { Assert.AreEqual(expected.Remove(key), actual.Remove(key)); } Assert.AreEqual(expected.Count, actual.Count() ); Assert.AreEqual(expected.Count==0, actual.IsEmpty()); } } }
public void testReservedValues() { FastIDSet set = new FastIDSet(); try { set.Add(Int64.MinValue); Assert.Fail("Should have thrown IllegalArgumentException"); } catch (ArgumentException iae) { //IllegalArgumentException // good } Assert.False(set.Contains(Int64.MinValue)); try { set.Add(long.MaxValue); Assert.Fail("Should have thrown IllegalArgumentException"); } catch (ArgumentException iae) { // good } Assert.False(set.Contains(long.MaxValue)); }
public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder, IDataModelBuilder dataModelBuilder, IDataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) { //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); //Preconditions.checkArgument(dataModel != null, "dataModel is null"); //Preconditions.checkArgument(at >= 1, "at must be at least 1"); //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, // "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0"); int numItems = dataModel.GetNumItems(); IRunningAverage precision = new FullRunningAverage(); IRunningAverage recall = new FullRunningAverage(); IRunningAverage fallOut = new FullRunningAverage(); IRunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); int numRelevantItems = relevantItemIDs.Count(); if (numRelevantItems <= 0) { continue; } FastByIDMap <IPreferenceArray> trainingUsers = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers()); var it2 = dataModel.GetUserIDs(); while (it2.MoveNext()) { dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel); } IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers) : dataModelBuilder.BuildDataModel(trainingUsers); try { trainingModel.GetPreferencesFromUser(userID); } catch (NoSuchUserException nsee) { continue; // Oops we excluded all prefs for the user -- just move on } int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count(); if (size < 2 * at) { // Really not enough prefs to meaningfully evaluate this user continue; } IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel); int intersectionSize = 0; var recommendedItems = recommender.Recommend(userID, at, rescorer); foreach (IRecommendedItem recommendedItem in recommendedItems) { if (relevantItemIDs.Contains(recommendedItem.GetItemID())) { intersectionSize++; } } int numRecommendedItems = recommendedItems.Count; // Precision if (numRecommendedItems > 0) { precision.AddDatum((double)intersectionSize / (double)numRecommendedItems); } // Recall recall.AddDatum((double)intersectionSize / (double)numRelevantItems); // Fall-out if (numRelevantItems < size) { fallOut.AddDatum((double)(numRecommendedItems - intersectionSize) / (double)(numItems - numRelevantItems)); } // nDCG // In computing, assume relevant IDs have relevance 1 and others 0 double cumulativeGain = 0.0; double idealizedGain = 0.0; for (int i = 0; i < numRecommendedItems; i++) { IRecommendedItem item = recommendedItems[i]; double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here if (relevantItemIDs.Contains(item.GetItemID())) { cumulativeGain += discount; } // otherwise we're multiplying discount by relevance 0 so it doesn't do anything // Ideally results would be ordered with all relevant ones first, so this theoretical // ideal list starts with number of relevant items equal to the total number of relevant items if (i < numRelevantItems) { idealizedGain += discount; } } if (idealizedGain > 0.0) { nDCG.AddDatum(cumulativeGain / idealizedGain); } // Reach numUsersRecommendedFor++; if (numRecommendedItems > 0) { numUsersWithRecommendations++; } stopWatch.Stop(); log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds); log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double)numUsersWithRecommendations / (double)numUsersRecommendedFor); } return(new IRStatisticsImpl( precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double)numUsersWithRecommendations / (double)numUsersRecommendedFor)); }