public void persistAndLoad() { FastByIDMap<int?> userIDMapping = new FastByIDMap<int?>(); FastByIDMap<int?> itemIDMapping = new FastByIDMap<int?>(); userIDMapping.Put(123, 0); userIDMapping.Put(456, 1); itemIDMapping.Put(12, 0); itemIDMapping.Put(34, 1); double[][] userFeatures = new double[][] { new double[] { 0.1, 0.2, 0.3 }, new double[] { 0.4, 0.5, 0.6 } }; double[][] itemFeatures = new double[][] { new double[] { 0.7, 0.8, 0.9 }, new double[] { 1.0, 1.1, 1.2 } }; Factorization original = new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures); var storage = Path.Combine( Path.GetTempPath(), "storage.bin"); try { IPersistenceStrategy persistenceStrategy = new FilePersistenceStrategy(storage); Assert.IsNull(persistenceStrategy.Load()); persistenceStrategy.MaybePersist(original); Factorization clone = persistenceStrategy.Load(); Assert.True(original.Equals( clone ) ); } finally { if (File.Exists(storage)) try { File.Delete(storage); } catch { } } }
public void setUpToyData() { this.rank = 3; this.lambda = 0.01; this.numIterations = 1000; FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(1L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(1L, 1L, 5.0f), new GenericPreference(1L, 2L, 5.0f), new GenericPreference(1L, 3L, 2.0f) })); userData.Put(2L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(2L, 1L, 2.0f), new GenericPreference(2L, 3L, 3.0f), new GenericPreference(2L, 4L, 5.0f) })); userData.Put(3L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(3L, 2L, 5.0f), new GenericPreference(3L, 4L, 3.0f) })); userData.Put(4L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(4L, 1L, 3.0f), new GenericPreference(4L, 4L, 5.0f) })); dataModel = new GenericDataModel(userData); }
private void initSimilarityMaps(IEnumerator <ItemItemSimilarity> similarities) { while (similarities.MoveNext()) { ItemItemSimilarity current = similarities.Current; long num = current.getItemID1(); long num2 = current.getItemID2(); if (num != num2) { long num3; long num4; if (num < num2) { num3 = num; num4 = num2; } else { num3 = num2; num4 = num; } FastByIDMap <double?> map = this.similarityMaps.get(num3); if (map == null) { map = new FastByIDMap <double?>(); this.similarityMaps.put(num3, map); } map.put(num4, new double?(current.getValue())); this.doIndex(num3, num4); this.doIndex(num4, num3); } } }
public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel) { this.similarityMaps = new FastByIDMap <FastByIDMap <double?> >(); this.similarItemIDsIndex = new FastByIDMap <FastIDSet>(); long[] itemIDs = GenericUserSimilarity.longIteratorToList(dataModel.getItemIDs()); this.initSimilarityMaps(new DataModelSimilaritiesIterator(otherSimilarity, itemIDs)); }
private void buildClusters() { DataModel model = getDataModel(); int numUsers = model.getNumUsers(); if (numUsers > 0) { List <FastIDSet> newClusters = new List <FastIDSet>(); // Begin with a cluster for each user: var it = model.getUserIDs(); while (it.MoveNext()) { FastIDSet newCluster = new FastIDSet(); newCluster.add(it.Current); newClusters.Add(newCluster); } if (numUsers > 1) { findClusters(newClusters); } topRecsByUserID = computeTopRecsPerUserID(newClusters); clustersByUserID = computeClustersPerUserID(newClusters); allClusters = newClusters.ToArray(); } else { topRecsByUserID = new FastByIDMap <List <RecommendedItem> >(); clustersByUserID = new FastByIDMap <FastIDSet>(); allClusters = NO_CLUSTERS; } }
public GenericDataModel(FastByIDMap <PreferenceArray> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { this.preferenceFromUsers = userData; FastByIDMap <List <Preference> > data = new FastByIDMap <List <Preference> >(); FastIDSet set = new FastIDSet(); int num = 0; float negativeInfinity = float.NegativeInfinity; float positiveInfinity = float.PositiveInfinity; foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceFromUsers.entrySet()) { PreferenceArray array = pair.Value; array.sortByItem(); foreach (Preference preference in array) { long key = preference.getItemID(); set.add(key); List <Preference> list = data.get(key); if (list == null) { list = new List <Preference>(2); data.put(key, list); } list.Add(preference); float num5 = preference.getValue(); if (num5 > negativeInfinity) { negativeInfinity = num5; } if (num5 < positiveInfinity) { positiveInfinity = num5; } } if ((++num % 0x2710) == 0) { log.info("Processed {0} users", new object[] { num }); } } log.info("Processed {0} users", new object[] { num }); this.setMinPreference(positiveInfinity); this.setMaxPreference(negativeInfinity); this.itemIDs = set.toArray(); set = null; Array.Sort <long>(this.itemIDs); this.preferenceForItems = toDataMap(data, false); foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceForItems.entrySet()) { pair.Value.sortByUser(); } this.userIDs = new long[userData.size()]; int num6 = 0; foreach (long num7 in userData.Keys) { this.userIDs[num6++] = num7; } Array.Sort <long>(this.userIDs); this.timestamps = timestamps; }
public double itemSimilarity(long itemID1, long itemID2) { long num; long num2; if (itemID1 == itemID2) { return(1.0); } if (itemID1 < itemID2) { num = itemID1; num2 = itemID2; } else { num = itemID2; num2 = itemID1; } FastByIDMap <double?> map = this.similarityMaps.get(num); if (map == null) { return(double.NaN); } double?nullable = map.get(num2); return(!nullable.HasValue ? double.NaN : nullable.Value); }
private void buildClusters() { DataModel model = getDataModel(); int numUsers = model.getNumUsers(); if (numUsers == 0) { topRecsByUserID = new FastByIDMap <List <RecommendedItem> >(); clustersByUserID = new FastByIDMap <FastIDSet>(); } else { List <FastIDSet> clusters = new List <FastIDSet>(); // Begin with a cluster for each user: var it = model.getUserIDs(); while (it.MoveNext()) { FastIDSet newCluster = new FastIDSet(); newCluster.add(it.Current); clusters.Add(newCluster); } bool done = false; while (!done) { done = mergeClosestClusters(numUsers, clusters, done); } topRecsByUserID = computeTopRecsPerUserID(clusters); clustersByUserID = computeClustersPerUserID(clusters); allClusters = clusters.ToArray(); } }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap <IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count > 0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public void testGetUserIDs() { IPreferenceArray prefs = new GenericUserPreferenceArray(1); long sampleUserID = 1; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); FastByIDMap <IPreferenceArray> delegatePreferences = new FastByIDMap <IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); long anonymousUserID = instance.TakeAvailableUser().Value; IPreferenceArray tempPrefs = new GenericUserPreferenceArray(1); tempPrefs.SetUserID(0, anonymousUserID); tempPrefs.SetItemID(0, 22); instance.SetTempPrefs(tempPrefs, anonymousUserID); var userIDs = instance.GetUserIDs(); userIDs.MoveNext(); Assert.AreEqual(sampleUserID, userIDs.Current); Assert.False(userIDs.MoveNext()); }
private void pruneInconsequentialDiffs() { // Go back and prune inconsequential diffs. "Inconsequential" means, here, only represented by one // data point, so possibly unreliable var it1 = averageDiffs.entrySet().ToList(); for (int i = 0; i < it1.Count; i++) { FastByIDMap <RunningAverage> map = it1[i].Value; var it2 = map.entrySet().ToList(); for (int j = 0; j < it2.Count; j++) { RunningAverage average = it2[j].Value; if (average.getCount() <= 1) { map.remove(it2[j].Key); } } if (map.isEmpty()) { averageDiffs.remove(it1[i].Key); } else { map.rehash(); } } averageDiffs.rehash(); }
public void removeItemPref(long userID, long itemIDA, float prefValue) { PreferenceArray userPreferences = dataModel.getPreferencesFromUser(userID); try { buildAverageDiffsLock.EnterWriteLock(); FastByIDMap <RunningAverage> aMap = averageDiffs.get(itemIDA); int length = userPreferences.length(); for (int i = 0; i < length; i++) { long itemIDB = userPreferences.getItemID(i); float bValue = userPreferences.getValue(i); if (itemIDA < itemIDB) { if (aMap != null) { RunningAverage average = aMap.get(itemIDB); if (average != null) { if (average.getCount() <= 1) { aMap.remove(itemIDB); } else { average.removeDatum(bValue - prefValue); } } } } else if (itemIDA > itemIDB) { FastByIDMap <RunningAverage> bMap = averageDiffs.get(itemIDB); if (bMap != null) { RunningAverage average = bMap.get(itemIDA); if (average != null) { if (average.getCount() <= 1) { aMap.remove(itemIDA); } else { average.removeDatum(prefValue - bValue); } } } } } } finally { buildAverageDiffsLock.ExitWriteLock(); } }
/// <p> /// Returns the similarity between two items. Note that similarity is assumed to be symmetric, that /// {@code itemSimilarity(item1, item2) == itemSimilarity(item2, item1)}, and that /// {@code itemSimilarity(item1,item1) == 1.0} for all items. /// </p> /// /// @param itemID1 /// first item /// @param itemID2 /// second item /// @return similarity between the two public double ItemSimilarity(long itemID1, long itemID2) { if (itemID1 == itemID2) { return(1.0); } long firstID; long secondID; if (itemID1 < itemID2) { firstID = itemID1; secondID = itemID2; } else { firstID = itemID2; secondID = itemID1; } FastByIDMap <double?> nextMap = similarityMaps.Get(firstID); if (nextMap == null) { return(Double.NaN); } double?similarity = nextMap.Get(secondID); return(!similarity.HasValue ? Double.NaN : similarity.Value); }
public void testStrategy() { List<IPreference> prefsOfUser123 = new List<IPreference>(); prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f)); List<IPreference> prefsOfUser456 = new List<IPreference>(); prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f)); prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f)); List<IPreference> prefsOfUser789 = new List<IPreference>(); prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f)); prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123); FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>(); userData.Put(123L, prefArrayOfUser123); userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456)); userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789)); IDataModel dataModel = new GenericDataModel(userData); ICandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems()); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel); Assert.True(candidateItems.Count() <= 1); Assert.False(candidateItems.Contains(1L)); }
public override void SetUp() { base.SetUp(); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(1L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(1L, 1L, 5.0f), new GenericPreference(1L, 2L, 5.0f), new GenericPreference(1L, 3L, 2.0f) })); userData.Put(2L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(2L, 1L, 2.0f), new GenericPreference(2L, 3L, 3.0f), new GenericPreference(2L, 4L, 5.0f) })); userData.Put(3L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(3L, 2L, 5.0f), new GenericPreference(3L, 4L, 3.0f) })); userData.Put(4L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(4L, 1L, 3.0f), new GenericPreference(4L, 4L, 5.0f) })); dataModel = new GenericDataModel(userData); factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 10); }
public void setUpSyntheticData() { int numUsers = 2000; int numItems = 1000; double sparsity = 0.5; this.rank = 20; this.lambda = 0.000000001; this.numIterations = 100; var users = randomMatrix(numUsers, rank, 1); var items = randomMatrix(rank, numItems, 1); var ratings = times(users, items); normalize(ratings, 5); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); for (int userIndex = 0; userIndex < numUsers; userIndex++) { List <IPreference> row = new List <IPreference>(); for (int itemIndex = 0; itemIndex < numItems; itemIndex++) { if (random.nextDouble() <= sparsity) { row.Add(new GenericPreference(userIndex, itemIndex, (float)ratings[userIndex, itemIndex])); } } userData.Put(userIndex, new GenericUserPreferenceArray(row)); } dataModel = new GenericDataModel(userData); }
internal static IDataModel BuildModel(IList <UserItem> userItems, bool isReviewBased) { FastByIDMap <IList <IPreference> > userPreferencesMap = new FastByIDMap <IList <IPreference> >(); foreach (var userItem in userItems) { var userPreferences = userPreferencesMap.Get(userItem.UserId); if (userPreferences == null) { userPreferences = new List <IPreference>(3); userPreferencesMap.Put(userItem.UserId, userPreferences); } if (isReviewBased) { userPreferences.Add(new GenericPreference(userItem.UserId, userItem.ItemId, userItem.Rating)); } else { userPreferences.Add(new BooleanPreference(userItem.UserId, userItem.ItemId)); } } var resultUserPreferences = new FastByIDMap <IPreferenceArray>(userPreferencesMap.Count()); foreach (var entry in userPreferencesMap.EntrySet()) { var prefList = (List <IPreference>)entry.Value; resultUserPreferences.Put(entry.Key, isReviewBased ? new GenericUserPreferenceArray(prefList) : (IPreferenceArray) new BooleanUserPreferenceArray(prefList)); } return(new GenericDataModel(resultUserPreferences)); }
public Factorization(FastByIDMap <int?> userIDMapping, FastByIDMap <int?> itemIDMapping, double[][] userFeatures, double[][] itemFeatures) { this.userIDMapping = userIDMapping; this.itemIDMapping = itemIDMapping; this.userFeatures = userFeatures; this.itemFeatures = itemFeatures; }
public Factorization(FastByIDMap<int?> userIDMapping, FastByIDMap<int?> itemIDMapping, double[][] userFeatures, double[][] itemFeatures) { this.userIDMapping = userIDMapping; //Preconditions.checkNotNull( this.itemIDMapping = itemIDMapping; //Preconditions.checkNotNull(); this.userFeatures = userFeatures; this.itemFeatures = itemFeatures; }
public double userSimilarity(long userID1, long userID2) { long num; long num2; if (userID1 == userID2) { return(1.0); } if (userID1 < userID2) { num = userID1; num2 = userID2; } else { num = userID2; num2 = userID1; } FastByIDMap <double> map = this.similarityMaps.get(num); if (map == null) { return(double.NaN); } return(map.get(num2)); }
public GenericDataModel GetUserBasedDataModel() { FastByIDMap <IPreferenceArray> data = new FastByIDMap <IPreferenceArray>(); IEnumerable <UserBookReview> allBookReviews = _userBookReviewRepository.GetListOf(); var everyReviewsUserId = allBookReviews.GroupBy(b => b.UserId).Select(x => x.Key).ToList(); foreach (int userId in everyReviewsUserId) { List <UserBookReview> bookReviewsForABook = (from userReviews in allBookReviews where userReviews.UserId == userId select userReviews).ToList(); List <IPreference> listOfPreferences = new List <IPreference>(); foreach (UserBookReview review in bookReviewsForABook) { int rating = review.Rating; int bookId = review.BookId; GenericPreference pref = new GenericPreference(userId, bookId, rating); /// userId, itemid, valueId listOfPreferences.Add(pref); } GenericUserPreferenceArray dataArray = new GenericUserPreferenceArray(listOfPreferences); data.Put(userId, dataArray); } return(new GenericDataModel(data)); }
private void initSimilarityMaps(IEnumerator <UserUserSimilarity> similarities) { while (similarities.MoveNext()) { UserUserSimilarity current = similarities.Current; long num = current.getUserID1(); long num2 = current.getUserID2(); if (num != num2) { long num3; long num4; if (num < num2) { num3 = num; num4 = num2; } else { num3 = num2; num4 = num; } FastByIDMap <double> map = this.similarityMaps.get(num3); if (map == null) { map = new FastByIDMap <double>(); this.similarityMaps.put(num3, map); } map.put(num4, current.getValue()); } } }
public void persistAndLoad() { FastByIDMap <int?> userIDMapping = new FastByIDMap <int?>(); FastByIDMap <int?> itemIDMapping = new FastByIDMap <int?>(); userIDMapping.Put(123, 0); userIDMapping.Put(456, 1); itemIDMapping.Put(12, 0); itemIDMapping.Put(34, 1); double[][] userFeatures = new double[][] { new double[] { 0.1, 0.2, 0.3 }, new double[] { 0.4, 0.5, 0.6 } }; double[][] itemFeatures = new double[][] { new double[] { 0.7, 0.8, 0.9 }, new double[] { 1.0, 1.1, 1.2 } }; Factorization original = new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures); var storage = Path.Combine(Path.GetTempPath(), "storage.bin"); try { IPersistenceStrategy persistenceStrategy = new FilePersistenceStrategy(storage); Assert.IsNull(persistenceStrategy.Load()); persistenceStrategy.MaybePersist(original); Factorization clone = persistenceStrategy.Load(); Assert.True(original.Equals(clone)); } finally { if (File.Exists(storage)) { try { File.Delete(storage); } catch { } } } }
public void testStrategy() { List <IPreference> prefsOfUser123 = new List <IPreference>(); prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f)); List <IPreference> prefsOfUser456 = new List <IPreference>(); prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f)); prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f)); List <IPreference> prefsOfUser789 = new List <IPreference>(); prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f)); prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(123L, prefArrayOfUser123); userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456)); userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789)); IDataModel dataModel = new GenericDataModel(userData); ICandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems()); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel); Assert.True(candidateItems.Count() <= 1); Assert.False(candidateItems.Contains(1L)); }
public void setUpSyntheticData() { int numUsers = 2000; int numItems = 1000; double sparsity = 0.5; this.rank = 20; this.lambda = 0.000000001; this.numIterations = 100; var users = randomMatrix(numUsers, rank, 1); var items = randomMatrix(rank, numItems, 1); var ratings = times(users, items); normalize(ratings, 5); FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>(); for (int userIndex = 0; userIndex < numUsers; userIndex++) { List<IPreference> row= new List<IPreference>(); for (int itemIndex = 0; itemIndex < numItems; itemIndex++) { if (random.nextDouble() <= sparsity) { row.Add(new GenericPreference(userIndex, itemIndex, (float) ratings[userIndex, itemIndex])); } } userData.Put(userIndex, new GenericUserPreferenceArray(row)); } dataModel = new GenericDataModel(userData); }
private void initSimilarityMaps(IEnumerator <UserUserSimilarity> similarities) { while (similarities.MoveNext()) { UserUserSimilarity uuc = similarities.Current; long similarityUser1 = uuc.getUserID1(); long similarityUser2 = uuc.getUserID2(); if (similarityUser1 != similarityUser2) { // Order them -- first key should be the "smaller" one long user1; long user2; if (similarityUser1 < similarityUser2) { user1 = similarityUser1; user2 = similarityUser2; } else { user1 = similarityUser2; user2 = similarityUser1; } FastByIDMap <Double> map = similarityMaps.Get(user1); if (map == null) { map = new FastByIDMap <Double>(); similarityMaps.Put(user1, map); } map.Put(user2, uuc.getValue()); } // else similarity between user and itself already assumed to be 1.0 } }
public double UserSimilarity(long userID1, long userID2) { if (userID1 == userID2) { return(1.0); } long first; long second; if (userID1 < userID2) { first = userID1; second = userID2; } else { first = userID2; second = userID1; } FastByIDMap <Double> nextMap = similarityMaps.Get(first); if (nextMap == null) { return(Double.NaN); } Double similarity = nextMap.Get(second); return(similarity == null ? Double.NaN : similarity); }
public Factorization(FastByIDMap <int?> userIDMapping, FastByIDMap <int?> itemIDMapping, double[][] userFeatures, double[][] itemFeatures) { this.userIDMapping = userIDMapping; //Preconditions.checkNotNull( this.itemIDMapping = itemIDMapping; //Preconditions.checkNotNull(); this.userFeatures = userFeatures; this.itemFeatures = itemFeatures; }
public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel, int maxToKeep) { this.similarityMaps = new FastByIDMap <FastByIDMap <double> >(); long[] itemIDs = longIteratorToList(dataModel.getUserIDs()); IEnumerator <UserUserSimilarity> allSimilarities = new DataModelSimilaritiesIterator(otherSimilarity, itemIDs); this.initSimilarityMaps(TopItems.getTopUserUserSimilarities(maxToKeep, allSimilarities).GetEnumerator()); }
private static FastByIDMap<int?> createIDMapping(int size, IEnumerator<long> idIterator) { var mapping = new FastByIDMap<int?>(size); int index = 0; while (idIterator.MoveNext()) { mapping.Put(idIterator.Current, index++); } return mapping; }
public void testGrow() { FastByIDMap<String> map = new FastByIDMap<String>(1,1); map.Put(500000L, "alpha"); map.Put(47L, "bang"); Assert.IsNull(map.Get(500000L)); Assert.AreEqual("bang", map.Get(47L)); }
/// <summary> /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable. /// </summary> /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param> /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param> public GenericDataModel(FastByIDMap<IPreferenceArray> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; FastByIDMap<IList<IPreference>> prefsForItems = new FastByIDMap<IList<IPreference>>(); FastIDSet itemIDSet = new FastIDSet(); int currentCount = 0; float maxPrefValue = float.NegativeInfinity; float minPrefValue = float.PositiveInfinity; foreach (var entry in preferenceFromUsers.EntrySet()) { IPreferenceArray prefs = entry.Value; prefs.SortByItem(); foreach (IPreference preference in prefs) { long itemID = preference.GetItemID(); itemIDSet.Add(itemID); var prefsForItem = prefsForItems.Get(itemID); if (prefsForItem == null) { prefsForItem = new List<IPreference>(2); prefsForItems.Put(itemID, prefsForItem); } prefsForItem.Add(preference); float value = preference.GetValue(); if (value > maxPrefValue) { maxPrefValue = value; } if (value < minPrefValue) { minPrefValue = value; } } if (++currentCount % 10000 == 0) { log.Info("Processed {0} users", currentCount); } } log.Info("Processed {0} users", currentCount); setMinPreference(minPrefValue); setMaxPreference(maxPrefValue); this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.preferenceForItems = ToDataMap(prefsForItems, false); foreach (var entry in preferenceForItems.EntrySet()) { entry.Value.SortByUser(); } this.userIDs = new long[userData.Count()]; int i = 0; foreach (var v in userData.Keys) { userIDs[i++] = v; } Array.Sort(userIDs); this.timestamps = timestamps; }
public ItemAverageRecommender(IDataModel dataModel) : base(dataModel) { this.itemAverages = new FastByIDMap <IRunningAverage>(); this.refreshHelper = new RefreshHelper(() => { buildAverageDiffs(); }); refreshHelper.AddDependency(dataModel); buildAverageDiffs(); }
private static void removeTimestamp(long userID, long itemID, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { FastByIDMap <DateTime?> map = timestamps.get(userID); if (map != null) { map.remove(itemID); } }
public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel, int maxToKeep) { this.similarityMaps = new FastByIDMap <FastByIDMap <double?> >(); this.similarItemIDsIndex = new FastByIDMap <FastIDSet>(); long[] itemIDs = GenericUserSimilarity.longIteratorToList(dataModel.getItemIDs()); DataModelSimilaritiesIterator allSimilarities = new DataModelSimilaritiesIterator(otherSimilarity, itemIDs); this.initSimilarityMaps(TopItems.getTopItemItemSimilarities(maxToKeep, allSimilarities).GetEnumerator()); }
private long processLine(String line, long averageCount) { if (string.IsNullOrEmpty(line) || line[0] == COMMENT_CHAR) { return(averageCount); } String[] tokens = SEPARATOR.Split(line); //Preconditions.checkArgument(tokens.length >= 3 && tokens.length != 5, "Bad line: %s", line); long itemID1 = long.Parse(tokens[0]); long itemID2 = long.Parse(tokens[1]); double diff = double.Parse(tokens[2]); int count = tokens.Length >= 4 ? int.Parse(tokens[3]) : 1; bool hasMkSk = tokens.Length >= 5; if (itemID1 > itemID2) { long temp = itemID1; itemID1 = itemID2; itemID2 = temp; } FastByIDMap <RunningAverage> level1Map = averageDiffs.get(itemID1); if (level1Map == null) { level1Map = new FastByIDMap <RunningAverage>(); averageDiffs.put(itemID1, level1Map); } RunningAverage average = level1Map.get(itemID2); if (average != null) { throw new Exception("Duplicated line for item-item pair " + itemID1 + " / " + itemID2); } if (averageCount < maxEntries) { if (hasMkSk) { double mk = Double.Parse(tokens[4]); double sk = Double.Parse(tokens[5]); average = new FullRunningAverageAndStdDev(count, diff, mk, sk); } else { average = new FullRunningAverage(count, diff); } level1Map.put(itemID2, average); averageCount++; } allRecommendableItemIDs.add(itemID1); allRecommendableItemIDs.add(itemID2); return(averageCount); }
public void testClear() { FastByIDMap<long?> map = new FastByIDMap<long?>(); map.Put(500000L, 2L); map.Clear(); Assert.AreEqual(0, map.Count()); Assert.True(map.IsEmpty()); Assert.IsNull(map.Get(500000L)); }
private void reload() { if (reloadLock.tryLock()) { try { longToString = buildMapping(); } catch (IOException ioe) { throw new InvalidOperationException(ioe); } finally { reloadLock.unlock(); } } }
private static void addDatumAndCreateIfNeeded(long itemID, float value, FastByIDMap <IRunningAverage> averages) { IRunningAverage itemAverage = averages.Get(itemID); if (itemAverage == null) { itemAverage = new FullRunningAverage(); averages.Put(itemID, itemAverage); } itemAverage.AddDatum(value); }
public void testMaxSize() { FastByIDMap<String> map = new FastByIDMap<String>(); map.Put(4, "bang"); Assert.AreEqual(1, map.Count()); map.Put(47L, "bang"); Assert.AreEqual(2, map.Count()); Assert.IsNull(map.Get(500000L)); map.Put(47L, "buzz"); Assert.AreEqual(2, map.Count()); Assert.AreEqual("buzz", map.Get(47L)); }
public FileIDMigrator(File dataFile, long minReloadIntervalMS) { longToString = new FastByIDMap<String>(100); this.dataFile = Preconditions.checkNotNull(dataFile); if (!dataFile.exists() || dataFile.isDirectory()) { throw new FileNotFoundException(dataFile.toString()); } log.info("Creating FileReadonlyIDMigrator for file {}", dataFile); this.reloadLock = new ReentrantLock(); this.lastModified = dataFile.lastModified(); this.minReloadIntervalMS = minReloadIntervalMS; reload(); }
public void testGetNumUsersWithDelegateUsersOnly() { IPreferenceArray prefs = new GenericUserPreferenceArray(1); long sampleUserID = 1; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); FastByIDMap<IPreferenceArray> delegatePreferences = new FastByIDMap<IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); Assert.AreEqual(1, instance.GetNumUsers()); }
public static IDataModel getDataModel(long[] userIDs, double?[][] prefValues) { FastByIDMap<IPreferenceArray> result = new FastByIDMap<IPreferenceArray>(); for (int i = 0; i < userIDs.Length; i++) { List<IPreference> prefsList = new List<IPreference>(); for (int j = 0; j < prefValues[i].Length; j++) { if (prefValues[i][j].HasValue) { prefsList.Add(new GenericPreference(userIDs[i], j, (float) prefValues[i][j].Value )); } } if (prefsList.Count>0) { result.Put(userIDs[i], new GenericUserPreferenceArray(prefsList)); } } return new GenericDataModel(result); }
public static IDataModel getBooleanDataModel(long[] userIDs, bool[][] prefs) { FastByIDMap<FastIDSet> result = new FastByIDMap<FastIDSet>(); for (int i = 0; i < userIDs.Length; i++) { FastIDSet prefsSet = new FastIDSet(); for (int j = 0; j < prefs[i].Length; j++) { if (prefs[i][j]) { prefsSet.Add(j); } } if (!prefsSet.IsEmpty()) { result.Put(userIDs[i], prefsSet); } } return new GenericBooleanPrefDataModel(result); }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap<IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count>0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
/// <p> /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This /// {@link DataModel} retains all this information in memory and is effectively immutable. /// </p> /// /// @param userData users to include /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. /// User IDs are mapped to maps of item IDs to long timestamps. public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap<FastIDSet>(); FastIDSet itemIDSet = new FastIDSet(); foreach (var entry in preferenceFromUsers.EntrySet()) { long userID = entry.Key; FastIDSet itemIDs1 = entry.Value; itemIDSet.AddAll(itemIDs1); var it = itemIDs1.GetEnumerator(); while (it.MoveNext()) { long itemID = it.Current; FastIDSet userIDs1 = preferenceForItems.Get(itemID); if (userIDs1 == null) { userIDs1 = new FastIDSet(2); preferenceForItems.Put(itemID, userIDs1); } userIDs1.Add(userID); } } this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.userIDs = new long[userData.Count()]; int i = 0; var it1 = userData.Keys.GetEnumerator(); while (it1.MoveNext()) { userIDs[i++] = it1.Current; } Array.Sort(userIDs); this.timestamps = timestamps; }
public IDataModel BuildDataModel(FastByIDMap<IPreferenceArray> trainingData) { return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(trainingData)); }
public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<IPreferenceArray> data) { var res = new FastByIDMap<FastIDSet>( data.Count() ); foreach (var entry in data.EntrySet()) { IPreferenceArray prefArray = entry.Value; int size = prefArray.Length(); FastIDSet itemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { itemIDs.Add(prefArray.GetItemID(i)); } res.Put( entry.Key, itemIDs ); } return res; }
/// <summary> /// Creates a new <see cref="GenericBooleanPrefDataModel"/> from the given users (and their preferences). This /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable. /// </summary> public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData) : this(userData, null) { }
public override void SetUp() { base.SetUp(); FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>(); userData.Put(1L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(1L, 1L, 5.0f), new GenericPreference(1L, 2L, 5.0f), new GenericPreference(1L, 3L, 2.0f) } )); userData.Put(2L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(2L, 1L, 2.0f), new GenericPreference(2L, 3L, 3.0f), new GenericPreference(2L, 4L, 5.0f) } )); userData.Put(3L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(3L, 2L, 5.0f), new GenericPreference(3L, 4L, 3.0f) } )); userData.Put(4L, new GenericUserPreferenceArray(new List<IPreference>() {new GenericPreference(4L, 1L, 3.0f), new GenericPreference(4L, 4L, 5.0f)})); dataModel = new GenericDataModel(userData); factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 10); }
private FastByIDMap<String> buildMapping() { FastByIDMap<String> mapping = new FastByIDMap<String>(); for (String line : new FileLineIterable(dataFile)) { mapping.put(tolongID(line), line); } lastModified = dataFile.lastModified(); return mapping; }
private static FastByIDMap<String> buildTestFastMap() { FastByIDMap<String> map = new FastByIDMap<String>(); map.Put(500000L, "alpha"); map.Put(47L, "bang"); map.Put(2L, "beta"); return map; }
public virtual double Evaluate(IRecommenderBuilder recommenderBuilder, IDataModelBuilder dataModelBuilder, IDataModel dataModel, double trainingPercentage, double evaluationPercentage) { //Preconditions.checkNotNull(recommenderBuilder); //Preconditions.checkNotNull(dataModel); //Preconditions.checkArgument(trainingPercentage >= 0.0 && trainingPercentage <= 1.0, // "Invalid trainingPercentage: " + trainingPercentage + ". Must be: 0.0 <= trainingPercentage <= 1.0"); //Preconditions.checkArgument(evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0, // "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 <= evaluationPercentage <= 1.0"); log.Info("Beginning evaluation using {} of {}", trainingPercentage, dataModel); int numUsers = dataModel.GetNumUsers(); FastByIDMap<IPreferenceArray> trainingPrefs = new FastByIDMap<IPreferenceArray>( 1 + (int) (evaluationPercentage * numUsers)); FastByIDMap<IPreferenceArray> testPrefs = new FastByIDMap<IPreferenceArray>( 1 + (int) (evaluationPercentage * numUsers)); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; if (random.nextDouble() < evaluationPercentage) { splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel); } } IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingPrefs) : dataModelBuilder.BuildDataModel(trainingPrefs); IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel); double result = getEvaluation(testPrefs, recommender); log.Info("Evaluation result: {}", result); return result; }
private double getEvaluation(FastByIDMap<IPreferenceArray> testPrefs, IRecommender recommender) { reset(); var estimateCallables = new List<Action>(); AtomicInteger noEstimateCounter = new AtomicInteger(); foreach (var entry in testPrefs.EntrySet()) { estimateCallables.Add( () => { var testUserID = entry.Key; var prefs = entry.Value; foreach (IPreference realPref in prefs) { float estimatedPreference = float.NaN; try { estimatedPreference = recommender.EstimatePreference(testUserID, realPref.GetItemID()); } catch (NoSuchUserException nsue) { // It's possible that an item exists in the test data but not training data in which case // NSEE will be thrown. Just ignore it and move on. log.Info("User exists in test data but not training data: {}", testUserID); } catch (NoSuchItemException nsie) { log.Info("Item exists in test data but not training data: {}", realPref.GetItemID()); } if (float.IsNaN(estimatedPreference)) { noEstimateCounter.incrementAndGet(); } else { estimatedPreference = capEstimatedPreference(estimatedPreference); processOneEstimate(estimatedPreference, realPref); } } }); // new PreferenceEstimateCallable(recommender, entry.Key, entry.Value, noEstimateCounter)); } log.Info("Beginning evaluation of {} users", estimateCallables.Count); IRunningAverageAndStdDev timing = new FullRunningAverageAndStdDev(); execute(estimateCallables, noEstimateCounter, timing); return computeFinalEvaluation(); }
private void splitOneUsersPrefs(double trainingPercentage, FastByIDMap<IPreferenceArray> trainingPrefs, FastByIDMap<IPreferenceArray> testPrefs, long userID, IDataModel dataModel) { List<IPreference> oneUserTrainingPrefs = null; List<IPreference> oneUserTestPrefs = null; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i)); if (random.nextDouble() < trainingPercentage) { if (oneUserTrainingPrefs == null) { oneUserTrainingPrefs = new List<IPreference>(3); } oneUserTrainingPrefs.Add(newPref); } else { if (oneUserTestPrefs == null) { oneUserTestPrefs = new List<IPreference>(3); } oneUserTestPrefs.Add(newPref); } } if (oneUserTrainingPrefs != null) { trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs)); if (oneUserTestPrefs != null) { testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs)); } } }
public void testSizeEmpty() { FastByIDMap<long> map = new FastByIDMap<long>(); Assert.AreEqual(0, map.Count()); Assert.True(map.IsEmpty()); map.Put(500000L, 2L); Assert.AreEqual(1, map.Count()); Assert.False(map.IsEmpty()); map.Remove(500000L); Assert.AreEqual(0, map.Count()); Assert.True(map.IsEmpty()); }
public IRStatistics Evaluate(IRecommenderBuilder recommenderBuilder, IDataModelBuilder dataModelBuilder, IDataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) { //Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); //Preconditions.checkArgument(dataModel != null, "dataModel is null"); //Preconditions.checkArgument(at >= 1, "at must be at least 1"); //Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, // "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0"); int numItems = dataModel.GetNumItems(); IRunningAverage precision = new FullRunningAverage(); IRunningAverage recall = new FullRunningAverage(); IRunningAverage fallOut = new FullRunningAverage(); IRunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = Double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.GetRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); int numRelevantItems = relevantItemIDs.Count(); if (numRelevantItems <= 0) { continue; } FastByIDMap<IPreferenceArray> trainingUsers = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers()); var it2 = dataModel.GetUserIDs(); while (it2.MoveNext()) { dataSplitter.ProcessOtherUser(userID, relevantItemIDs, trainingUsers, it2.Current, dataModel); } IDataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers) : dataModelBuilder.BuildDataModel(trainingUsers); try { trainingModel.GetPreferencesFromUser(userID); } catch (NoSuchUserException nsee) { continue; // Oops we excluded all prefs for the user -- just move on } int size = numRelevantItems + trainingModel.GetItemIDsFromUser(userID).Count(); if (size < 2 * at) { // Really not enough prefs to meaningfully evaluate this user continue; } IRecommender recommender = recommenderBuilder.BuildRecommender(trainingModel); int intersectionSize = 0; var recommendedItems = recommender.Recommend(userID, at, rescorer); foreach (IRecommendedItem recommendedItem in recommendedItems) { if (relevantItemIDs.Contains(recommendedItem.GetItemID())) { intersectionSize++; } } int numRecommendedItems = recommendedItems.Count; // Precision if (numRecommendedItems > 0) { precision.AddDatum((double) intersectionSize / (double) numRecommendedItems); } // Recall recall.AddDatum((double) intersectionSize / (double) numRelevantItems); // Fall-out if (numRelevantItems < size) { fallOut.AddDatum((double) (numRecommendedItems - intersectionSize) / (double) (numItems - numRelevantItems)); } // nDCG // In computing, assume relevant IDs have relevance 1 and others 0 double cumulativeGain = 0.0; double idealizedGain = 0.0; for (int i = 0; i < numRecommendedItems; i++) { IRecommendedItem item = recommendedItems[i]; double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here if (relevantItemIDs.Contains(item.GetItemID())) { cumulativeGain += discount; } // otherwise we're multiplying discount by relevance 0 so it doesn't do anything // Ideally results would be ordered with all relevant ones first, so this theoretical // ideal list starts with number of relevant items equal to the total number of relevant items if (i < numRelevantItems) { idealizedGain += discount; } } if (idealizedGain > 0.0) { nDCG.AddDatum(cumulativeGain / idealizedGain); } // Reach numUsersRecommendedFor++; if (numRecommendedItems > 0) { numUsersWithRecommendations++; } stopWatch.Stop(); log.Info("Evaluated with user {} in {}ms", userID, stopWatch.ElapsedMilliseconds); log.Info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); } return new IRStatisticsImpl( precision.GetAverage(), recall.GetAverage(), fallOut.GetAverage(), nDCG.GetAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); }
public void testPutAndGet() { FastByIDMap<long?> map = new FastByIDMap<long?>(); Assert.IsNull(map.Get(500000L)); map.Put(500000L, 2L); Assert.AreEqual(2L, (long) map.Get(500000L)); }
public MemoryIDMigrator() { this.longToString = new FastByIDMap<String>(100); }
public void testVersusHashMap() { FastByIDMap<String> actual = new FastByIDMap<String>(); IDictionary<long, string> expected = new Dictionary<long,string>(1000000); var r = RandomUtils.getRandom(); for (int i = 0; i < 1000000; i++) { double d = r.nextDouble(); long key = (long) r.nextInt(100); if (d < 0.4) { Assert.AreEqual( expected.ContainsKey(key)?expected[key]:null, actual.Get(key)); } else { if (d < 0.7) { var expectedOldVal = expected.ContainsKey(key) ? expected[key] : null; expected[key] = "bang"; Assert.AreEqual(expectedOldVal, actual.Put(key, "bang")); } else { var expectedOldVal = expected.ContainsKey(key) ? expected[key] : null; expected.Remove(key); Assert.AreEqual(expectedOldVal, actual.Remove(key)); } Assert.AreEqual(expected.Count, actual.Count()); Assert.AreEqual(expected.Count==0, actual.IsEmpty()); } } }