public override void SetUp() { base.SetUp(); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(1L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(1L, 1L, 5.0f), new GenericPreference(1L, 2L, 5.0f), new GenericPreference(1L, 3L, 2.0f) })); userData.Put(2L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(2L, 1L, 2.0f), new GenericPreference(2L, 3L, 3.0f), new GenericPreference(2L, 4L, 5.0f) })); userData.Put(3L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(3L, 2L, 5.0f), new GenericPreference(3L, 4L, 3.0f) })); userData.Put(4L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(4L, 1L, 3.0f), new GenericPreference(4L, 4L, 5.0f) })); dataModel = new GenericDataModel(userData); factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 10); }
public void testStrategy() { List <IPreference> prefsOfUser123 = new List <IPreference>(); prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f)); List <IPreference> prefsOfUser456 = new List <IPreference>(); prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f)); prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f)); List <IPreference> prefsOfUser789 = new List <IPreference>(); prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f)); prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(123L, prefArrayOfUser123); userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456)); userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789)); IDataModel dataModel = new GenericDataModel(userData); ICandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems()); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel); Assert.True(candidateItems.Count() <= 1); Assert.False(candidateItems.Contains(1L)); }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap <IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count > 0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public void persistAndLoad() { FastByIDMap <int?> userIDMapping = new FastByIDMap <int?>(); FastByIDMap <int?> itemIDMapping = new FastByIDMap <int?>(); userIDMapping.Put(123, 0); userIDMapping.Put(456, 1); itemIDMapping.Put(12, 0); itemIDMapping.Put(34, 1); double[][] userFeatures = new double[][] { new double[] { 0.1, 0.2, 0.3 }, new double[] { 0.4, 0.5, 0.6 } }; double[][] itemFeatures = new double[][] { new double[] { 0.7, 0.8, 0.9 }, new double[] { 1.0, 1.1, 1.2 } }; Factorization original = new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures); var storage = Path.Combine(Path.GetTempPath(), "storage.bin"); try { IPersistenceStrategy persistenceStrategy = new FilePersistenceStrategy(storage); Assert.IsNull(persistenceStrategy.Load()); persistenceStrategy.MaybePersist(original); Factorization clone = persistenceStrategy.Load(); Assert.True(original.Equals(clone)); } finally { if (File.Exists(storage)) { try { File.Delete(storage); } catch { } } } }
public void setUpToyData() { this.rank = 3; this.lambda = 0.01; this.numIterations = 1000; FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(1L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(1L, 1L, 5.0f), new GenericPreference(1L, 2L, 5.0f), new GenericPreference(1L, 3L, 2.0f) })); userData.Put(2L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(2L, 1L, 2.0f), new GenericPreference(2L, 3L, 3.0f), new GenericPreference(2L, 4L, 5.0f) })); userData.Put(3L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(3L, 2L, 5.0f), new GenericPreference(3L, 4L, 3.0f) })); userData.Put(4L, new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(4L, 1L, 3.0f), new GenericPreference(4L, 4L, 5.0f) })); dataModel = new GenericDataModel(userData); }
public void testStrategy() { List<IPreference> prefsOfUser123 = new List<IPreference>(); prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f)); List<IPreference> prefsOfUser456 = new List<IPreference>(); prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f)); prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f)); List<IPreference> prefsOfUser789 = new List<IPreference>(); prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f)); prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123); FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>(); userData.Put(123L, prefArrayOfUser123); userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456)); userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789)); IDataModel dataModel = new GenericDataModel(userData); ICandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems()); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel); Assert.True(candidateItems.Count() <= 1); Assert.False(candidateItems.Contains(1L)); }
public void persistAndLoad() { FastByIDMap<int?> userIDMapping = new FastByIDMap<int?>(); FastByIDMap<int?> itemIDMapping = new FastByIDMap<int?>(); userIDMapping.Put(123, 0); userIDMapping.Put(456, 1); itemIDMapping.Put(12, 0); itemIDMapping.Put(34, 1); double[][] userFeatures = new double[][] { new double[] { 0.1, 0.2, 0.3 }, new double[] { 0.4, 0.5, 0.6 } }; double[][] itemFeatures = new double[][] { new double[] { 0.7, 0.8, 0.9 }, new double[] { 1.0, 1.1, 1.2 } }; Factorization original = new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures); var storage = Path.Combine( Path.GetTempPath(), "storage.bin"); try { IPersistenceStrategy persistenceStrategy = new FilePersistenceStrategy(storage); Assert.IsNull(persistenceStrategy.Load()); persistenceStrategy.MaybePersist(original); Factorization clone = persistenceStrategy.Load(); Assert.True(original.Equals( clone ) ); } finally { if (File.Exists(storage)) try { File.Delete(storage); } catch { } } }
public void testGrow() { FastByIDMap<String> map = new FastByIDMap<String>(1,1); map.Put(500000L, "alpha"); map.Put(47L, "bang"); Assert.IsNull(map.Get(500000L)); Assert.AreEqual("bang", map.Get(47L)); }
public void testMaxSize() { FastByIDMap<String> map = new FastByIDMap<String>(); map.Put(4, "bang"); Assert.AreEqual(1, map.Count()); map.Put(47L, "bang"); Assert.AreEqual(2, map.Count()); Assert.IsNull(map.Get(500000L)); map.Put(47L, "buzz"); Assert.AreEqual(2, map.Count()); Assert.AreEqual("buzz", map.Get(47L)); }
public void testGetUserIDs() { IPreferenceArray prefs = new GenericUserPreferenceArray(1); long sampleUserID = 1; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); FastByIDMap <IPreferenceArray> delegatePreferences = new FastByIDMap <IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); long anonymousUserID = instance.TakeAvailableUser().Value; IPreferenceArray tempPrefs = new GenericUserPreferenceArray(1); tempPrefs.SetUserID(0, anonymousUserID); tempPrefs.SetItemID(0, 22); instance.SetTempPrefs(tempPrefs, anonymousUserID); var userIDs = instance.GetUserIDs(); userIDs.MoveNext(); Assert.AreEqual(sampleUserID, userIDs.Current); Assert.False(userIDs.MoveNext()); }
public void setUpSyntheticData() { int numUsers = 2000; int numItems = 1000; double sparsity = 0.5; this.rank = 20; this.lambda = 0.000000001; this.numIterations = 100; var users = randomMatrix(numUsers, rank, 1); var items = randomMatrix(rank, numItems, 1); var ratings = times(users, items); normalize(ratings, 5); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); for (int userIndex = 0; userIndex < numUsers; userIndex++) { List <IPreference> row = new List <IPreference>(); for (int itemIndex = 0; itemIndex < numItems; itemIndex++) { if (random.nextDouble() <= sparsity) { row.Add(new GenericPreference(userIndex, itemIndex, (float)ratings[userIndex, itemIndex])); } } userData.Put(userIndex, new GenericUserPreferenceArray(row)); } dataModel = new GenericDataModel(userData); }
public GenericDataModel GetUserBasedDataModel() { FastByIDMap <IPreferenceArray> data = new FastByIDMap <IPreferenceArray>(); IEnumerable <UserBookReview> allBookReviews = _userBookReviewRepository.GetListOf(); var everyReviewsUserId = allBookReviews.GroupBy(b => b.UserId).Select(x => x.Key).ToList(); foreach (int userId in everyReviewsUserId) { List <UserBookReview> bookReviewsForABook = (from userReviews in allBookReviews where userReviews.UserId == userId select userReviews).ToList(); List <IPreference> listOfPreferences = new List <IPreference>(); foreach (UserBookReview review in bookReviewsForABook) { int rating = review.Rating; int bookId = review.BookId; GenericPreference pref = new GenericPreference(userId, bookId, rating); /// userId, itemid, valueId listOfPreferences.Add(pref); } GenericUserPreferenceArray dataArray = new GenericUserPreferenceArray(listOfPreferences); data.Put(userId, dataArray); } return(new GenericDataModel(data)); }
internal static IDataModel BuildModel(IList <UserItem> userItems, bool isReviewBased) { FastByIDMap <IList <IPreference> > userPreferencesMap = new FastByIDMap <IList <IPreference> >(); foreach (var userItem in userItems) { var userPreferences = userPreferencesMap.Get(userItem.UserId); if (userPreferences == null) { userPreferences = new List <IPreference>(3); userPreferencesMap.Put(userItem.UserId, userPreferences); } if (isReviewBased) { userPreferences.Add(new GenericPreference(userItem.UserId, userItem.ItemId, userItem.Rating)); } else { userPreferences.Add(new BooleanPreference(userItem.UserId, userItem.ItemId)); } } var resultUserPreferences = new FastByIDMap <IPreferenceArray>(userPreferencesMap.Count()); foreach (var entry in userPreferencesMap.EntrySet()) { var prefList = (List <IPreference>)entry.Value; resultUserPreferences.Put(entry.Key, isReviewBased ? new GenericUserPreferenceArray(prefList) : (IPreferenceArray) new BooleanUserPreferenceArray(prefList)); } return(new GenericDataModel(resultUserPreferences)); }
public void setUpSyntheticData() { int numUsers = 2000; int numItems = 1000; double sparsity = 0.5; this.rank = 20; this.lambda = 0.000000001; this.numIterations = 100; var users = randomMatrix(numUsers, rank, 1); var items = randomMatrix(rank, numItems, 1); var ratings = times(users, items); normalize(ratings, 5); FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>(); for (int userIndex = 0; userIndex < numUsers; userIndex++) { List<IPreference> row= new List<IPreference>(); for (int itemIndex = 0; itemIndex < numItems; itemIndex++) { if (random.nextDouble() <= sparsity) { row.Add(new GenericPreference(userIndex, itemIndex, (float) ratings[userIndex, itemIndex])); } } userData.Put(userIndex, new GenericUserPreferenceArray(row)); } dataModel = new GenericDataModel(userData); }
public void testClear() { FastByIDMap<long?> map = new FastByIDMap<long?>(); map.Put(500000L, 2L); map.Clear(); Assert.AreEqual(0, map.Count()); Assert.True(map.IsEmpty()); Assert.IsNull(map.Get(500000L)); }
/// <summary> /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable. /// </summary> /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param> /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param> public GenericDataModel(FastByIDMap<IPreferenceArray> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; FastByIDMap<IList<IPreference>> prefsForItems = new FastByIDMap<IList<IPreference>>(); FastIDSet itemIDSet = new FastIDSet(); int currentCount = 0; float maxPrefValue = float.NegativeInfinity; float minPrefValue = float.PositiveInfinity; foreach (var entry in preferenceFromUsers.EntrySet()) { IPreferenceArray prefs = entry.Value; prefs.SortByItem(); foreach (IPreference preference in prefs) { long itemID = preference.GetItemID(); itemIDSet.Add(itemID); var prefsForItem = prefsForItems.Get(itemID); if (prefsForItem == null) { prefsForItem = new List<IPreference>(2); prefsForItems.Put(itemID, prefsForItem); } prefsForItem.Add(preference); float value = preference.GetValue(); if (value > maxPrefValue) { maxPrefValue = value; } if (value < minPrefValue) { minPrefValue = value; } } if (++currentCount % 10000 == 0) { log.Info("Processed {0} users", currentCount); } } log.Info("Processed {0} users", currentCount); setMinPreference(minPrefValue); setMaxPreference(maxPrefValue); this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.preferenceForItems = ToDataMap(prefsForItems, false); foreach (var entry in preferenceForItems.EntrySet()) { entry.Value.SortByUser(); } this.userIDs = new long[userData.Count()]; int i = 0; foreach (var v in userData.Keys) { userIDs[i++] = v; } Array.Sort(userIDs); this.timestamps = timestamps; }
private static FastByIDMap <int?> createIDMapping(int size, IEnumerator <long> idIterator) { var mapping = new FastByIDMap <int?>(size); int index = 0; while (idIterator.MoveNext()) { mapping.Put(idIterator.Current, index++); } return(mapping); }
protected int itemIndex(long itemID) { int?itemIndex = itemIDMapping.Get(itemID); if (itemIndex == null) { itemIndex = itemIDMapping.Count(); itemIDMapping.Put(itemID, itemIndex); } return(itemIndex.Value); }
protected int userIndex(long userID) { int?userIndex = userIDMapping.Get(userID); if (userIndex == null) { userIndex = userIDMapping.Count(); userIDMapping.Put(userID, userIndex); } return(userIndex.Value); }
private static void addDatumAndCreateIfNeeded(long itemID, float value, FastByIDMap <IRunningAverage> averages) { IRunningAverage itemAverage = averages.Get(itemID); if (itemAverage == null) { itemAverage = new FullRunningAverage(); averages.Put(itemID, itemAverage); } itemAverage.AddDatum(value); }
/// <summary>Exports the simple user IDs and preferences in the data model.</summary> /// <returns>a <see cref="FastByIDMap"/> mapping user IDs to <see cref="IPreferenceArray"/>s representing that user's preferences</returns> public static FastByIDMap <IPreferenceArray> ToDataMap(IDataModel dataModel) { FastByIDMap <IPreferenceArray> data = new FastByIDMap <IPreferenceArray>(dataModel.GetNumUsers()); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; data.Put(userID, dataModel.GetPreferencesFromUser(userID)); } return(data); }
/// Exports the simple user IDs and associated item IDs in the data model. /// /// @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing /// that user's associated items public static FastByIDMap <FastIDSet> toDataMap(IDataModel dataModel) { FastByIDMap <FastIDSet> data = new FastByIDMap <FastIDSet>(dataModel.GetNumUsers()); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; data.Put(userID, dataModel.GetItemIDsFromUser(userID)); } return(data); }
/// <summary>Swaps, in-place, <see cref="IList<T>"/>s for arrays in map values.</summary> /// <returns>input value</returns> public static FastByIDMap <IPreferenceArray> ToDataMap(FastByIDMap <IList <IPreference> > data, bool byUser) { var newData = new FastByIDMap <IPreferenceArray>(data.Count()); foreach (var entry in data.EntrySet()) { var prefList = entry.Value; newData.Put(entry.Key, byUser ? (IPreferenceArray) new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(prefList)); } return(newData); }
protected void processLineWithoutID(String line, FastByIDMap <FastIDSet> data, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { if (String.IsNullOrWhiteSpace(line) || line[0] == COMMENT_CHAR) { return; } var tokens = SplitLine(line); string userIDString = tokens[0]; string itemIDString = tokens[1]; bool hasPreference = tokens.Length > 2; string preferenceValueString = hasPreference ? tokens[2] : ""; bool hasTimestamp = tokens.Length > 3; string timestampString = hasTimestamp ? tokens[3] : null; long userID = readUserIDFromString(userIDString); long itemID = readItemIDFromString(itemIDString); if (transpose) { long tmp = userID; userID = itemID; itemID = tmp; } if (hasPreference && !hasTimestamp && String.IsNullOrEmpty(preferenceValueString)) { // Then line is of form "userID,itemID,", meaning remove FastIDSet itemIDs = data.Get(userID); if (itemIDs != null) { itemIDs.Remove(itemID); } removeTimestamp(userID, itemID, timestamps); } else { FastIDSet itemIDs = data.Get(userID); if (itemIDs == null) { itemIDs = new FastIDSet(2); data.Put(userID, itemIDs); } itemIDs.Add(itemID); addTimestamp(userID, itemID, timestampString, timestamps); } }
public void testGetNumUsersWithDelegateUsersOnly() { IPreferenceArray prefs = new GenericUserPreferenceArray(1); long sampleUserID = 1; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); FastByIDMap<IPreferenceArray> delegatePreferences = new FastByIDMap<IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); Assert.AreEqual(1, instance.GetNumUsers()); }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap<IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count>0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public static IDataModel getBooleanDataModel(long[] userIDs, bool[][] prefs) { FastByIDMap<FastIDSet> result = new FastByIDMap<FastIDSet>(); for (int i = 0; i < userIDs.Length; i++) { FastIDSet prefsSet = new FastIDSet(); for (int j = 0; j < prefs[i].Length; j++) { if (prefs[i][j]) { prefsSet.Add(j); } } if (!prefsSet.IsEmpty()) { result.Put(userIDs[i], prefsSet); } } return new GenericBooleanPrefDataModel(result); }
public static IDataModel getDataModel(long[] userIDs, double?[][] prefValues) { FastByIDMap<IPreferenceArray> result = new FastByIDMap<IPreferenceArray>(); for (int i = 0; i < userIDs.Length; i++) { List<IPreference> prefsList = new List<IPreference>(); for (int j = 0; j < prefValues[i].Length; j++) { if (prefValues[i][j].HasValue) { prefsList.Add(new GenericPreference(userIDs[i], j, (float) prefValues[i][j].Value )); } } if (prefsList.Count>0) { result.Put(userIDs[i], new GenericUserPreferenceArray(prefsList)); } } return new GenericDataModel(result); }
private void addTimestamp(long userID, long itemID, string timestampString, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { if (timestampString != null) { FastByIDMap <DateTime?> itemTimestamps = timestamps.Get(userID); if (itemTimestamps == null) { itemTimestamps = new FastByIDMap <DateTime?>(); timestamps.Put(userID, itemTimestamps); } var timestamp = readTimestampFromString(timestampString); itemTimestamps.Put(itemID, timestamp); } }
public static Factorization readBinary(Stream inFile) { var binRdr = new BinaryReader(inFile); int numFeatures = binRdr.ReadInt32(); int numUsers = binRdr.ReadInt32(); int numItems = binRdr.ReadInt32(); FastByIDMap <int?> userIDMapping = new FastByIDMap <int?>(numUsers); double[][] userFeatures = new double[numUsers][]; for (int n = 0; n < numUsers; n++) { int userIndex = binRdr.ReadInt32(); long userID = binRdr.ReadInt64(); userFeatures[userIndex] = new double[numFeatures]; userIDMapping.Put(userID, userIndex); for (int feature = 0; feature < numFeatures; feature++) { userFeatures[userIndex][feature] = binRdr.ReadDouble(); } } FastByIDMap <int?> itemIDMapping = new FastByIDMap <int?>(numItems); double[][] itemFeatures = new double[numItems][]; for (int n = 0; n < numItems; n++) { int itemIndex = binRdr.ReadInt32(); long itemID = binRdr.ReadInt64(); itemFeatures[itemIndex] = new double[numFeatures]; itemIDMapping.Put(itemID, itemIndex); for (int feature = 0; feature < numFeatures; feature++) { itemFeatures[itemIndex][feature] = binRdr.ReadDouble(); } } return(new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures)); }
public static FastByIDMap <FastIDSet> toDataMap(FastByIDMap <IPreferenceArray> data) { var res = new FastByIDMap <FastIDSet>(data.Count()); foreach (var entry in data.EntrySet()) { IPreferenceArray prefArray = entry.Value; int size = prefArray.Length(); FastIDSet itemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { itemIDs.Add(prefArray.GetItemID(i)); } res.Put(entry.Key, itemIDs); } return(res); }
public void testGetPreferencesForNonAnonymousUser() { IPreferenceArray prefs = new GenericUserPreferenceArray(1); long sampleUserID = 1; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); FastByIDMap <IPreferenceArray> delegatePreferences = new FastByIDMap <IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); Assert.AreEqual(prefs, instance.GetPreferencesFromUser(sampleUserID)); }
/// <p> /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This /// {@link DataModel} retains all this information in memory and is effectively immutable. /// </p> /// /// @param userData users to include /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. /// User IDs are mapped to maps of item IDs to long timestamps. public GenericBooleanPrefDataModel(FastByIDMap <FastIDSet> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap <FastIDSet>(); FastIDSet itemIDSet = new FastIDSet(); foreach (var entry in preferenceFromUsers.EntrySet()) { long userID = entry.Key; FastIDSet itemIDs1 = entry.Value; itemIDSet.AddAll(itemIDs1); var it = itemIDs1.GetEnumerator(); while (it.MoveNext()) { long itemID = it.Current; FastIDSet userIDs1 = preferenceForItems.Get(itemID); if (userIDs1 == null) { userIDs1 = new FastIDSet(2); preferenceForItems.Put(itemID, userIDs1); } userIDs1.Add(userID); } } this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.userIDs = new long[userData.Count()]; int i = 0; var it1 = userData.Keys.GetEnumerator(); while (it1.MoveNext()) { userIDs[i++] = it1.Current; } Array.Sort(userIDs); this.timestamps = timestamps; }
public void testGetPreferencesForItem() { IPreferenceArray prefs = new GenericUserPreferenceArray(2); long sampleUserID = 4; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); prefs.SetUserID(1, sampleUserID); long sampleItemID2 = 22; prefs.SetItemID(1, sampleItemID2); FastByIDMap <IPreferenceArray> delegatePreferences = new FastByIDMap <IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); long anonymousUserID = instance.TakeAvailableUser().Value; IPreferenceArray tempPrefs = new GenericUserPreferenceArray(2); tempPrefs.SetUserID(0, anonymousUserID); tempPrefs.SetItemID(0, sampleItemID); tempPrefs.SetUserID(1, anonymousUserID); long sampleItemID3 = 33; tempPrefs.SetItemID(1, sampleItemID3); instance.SetTempPrefs(tempPrefs, anonymousUserID); Assert.AreEqual(sampleUserID, instance.GetPreferencesForItem(sampleItemID).Get(0).GetUserID()); Assert.AreEqual(2, instance.GetPreferencesForItem(sampleItemID).Length()); Assert.AreEqual(1, instance.GetPreferencesForItem(sampleItemID2).Length()); Assert.AreEqual(1, instance.GetPreferencesForItem(sampleItemID3).Length()); Assert.AreEqual(2, instance.GetNumUsersWithPreferenceFor(sampleItemID)); Assert.AreEqual(1, instance.GetNumUsersWithPreferenceFor(sampleItemID, sampleItemID2)); Assert.AreEqual(1, instance.GetNumUsersWithPreferenceFor(sampleItemID, sampleItemID3)); }
public IDataModel Load() { var hasPrefVal = !String.IsNullOrEmpty(PrefValFld); FastByIDMap <IList <IPreference> > data = new FastByIDMap <IList <IPreference> >(); using (var dbRdr = SelectCmd.ExecuteReader()) { while (dbRdr.Read()) { long userID = Convert.ToInt64(dbRdr[UserIdFld]); long itemID = Convert.ToInt64(dbRdr[ItemIdFld]); var userPrefs = data.Get(userID); if (userPrefs == null) { userPrefs = new List <IPreference>(3); data.Put(userID, userPrefs); } if (hasPrefVal) { var prefVal = Convert.ToSingle(dbRdr[PrefValFld]); userPrefs.Add(new GenericPreference(userID, itemID, prefVal)); } else { userPrefs.Add(new BooleanPreference(userID, itemID)); } } } var newData = new FastByIDMap <IPreferenceArray>(data.Count()); foreach (var entry in data.EntrySet()) { var prefList = (List <IPreference>)entry.Value; newData.Put(entry.Key, hasPrefVal ? (IPreferenceArray) new GenericUserPreferenceArray(prefList) : (IPreferenceArray) new BooleanUserPreferenceArray(prefList)); } return(new GenericDataModel(newData)); }
public void testPreferenceShufflerWithSyntheticData() { setUpSyntheticData(); ParallelSGDFactorizer.PreferenceShuffler shuffler = new ParallelSGDFactorizer.PreferenceShuffler(dataModel); shuffler.shuffle(); shuffler.stage(); FastByIDMap <FastByIDMap <bool?> > checkedLst = new FastByIDMap <FastByIDMap <bool?> >(); for (int i = 0; i < shuffler.size(); i++) { IPreference pref = shuffler.get(i); float?value = dataModel.GetPreferenceValue(pref.GetUserID(), pref.GetItemID()); Assert.AreEqual(pref.GetValue(), value.Value, 0.0); if (!checkedLst.ContainsKey(pref.GetUserID())) { checkedLst.Put(pref.GetUserID(), new FastByIDMap <bool?>()); } Assert.IsNull(checkedLst.Get(pref.GetUserID()).Get(pref.GetItemID())); checkedLst.Get(pref.GetUserID()).Put(pref.GetItemID(), true); } var userIDs = dataModel.GetUserIDs(); int index = 0; while (userIDs.MoveNext()) { long userID = userIDs.Current; IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID); foreach (IPreference preference in preferencesFromUser) { Assert.True(checkedLst.Get(preference.GetUserID()).Get(preference.GetItemID()).Value); index++; } } Assert.AreEqual(index, shuffler.size()); }
public static IDataModel getDataModel(long[] userIDs, double?[][] prefValues) { FastByIDMap <IPreferenceArray> result = new FastByIDMap <IPreferenceArray>(); for (int i = 0; i < userIDs.Length; i++) { List <IPreference> prefsList = new List <IPreference>(); for (int j = 0; j < prefValues[i].Length; j++) { if (prefValues[i][j].HasValue) { prefsList.Add(new GenericPreference(userIDs[i], j, (float)prefValues[i][j].Value)); } } if (prefsList.Count > 0) { result.Put(userIDs[i], new GenericUserPreferenceArray(prefsList)); } } return(new GenericDataModel(result)); }
public static IDataModel getBooleanDataModel(long[] userIDs, bool[][] prefs) { FastByIDMap <FastIDSet> result = new FastByIDMap <FastIDSet>(); for (int i = 0; i < userIDs.Length; i++) { FastIDSet prefsSet = new FastIDSet(); for (int j = 0; j < prefs[i].Length; j++) { if (prefs[i][j]) { prefsSet.Add(j); } } if (!prefsSet.IsEmpty()) { result.Put(userIDs[i], prefsSet); } } return(new GenericBooleanPrefDataModel(result)); }
private IDataModel GetDataModel() { var cacheKey = "RecommenderDataModel"; IDataModel dataModel = _memoryCache.Get <IDataModel>(cacheKey); if (dataModel != null) { return(dataModel); } var movieRatings = _unitOfWork.MovieRatingRepository.GetAll(); FastByIDMap <IList <IPreference> > data = new FastByIDMap <IList <IPreference> >(); foreach (var movieRating in movieRatings) { var userPreferences = data.Get(movieRating.UserId); if (userPreferences == null) { userPreferences = new List <IPreference>(3); data.Put(movieRating.UserId, userPreferences); } userPreferences.Add(new BooleanPreference(movieRating.UserId, movieRating.MovieId)); } var newData = new FastByIDMap <IPreferenceArray>(data.Count()); foreach (var entry in data.EntrySet()) { var prefList = (List <IPreference>)entry.Value; newData.Put(entry.Key, (IPreferenceArray) new BooleanUserPreferenceArray(prefList)); } dataModel = new GenericDataModel(newData); _memoryCache.Set(cacheKey, dataModel); return(new GenericDataModel(newData)); }
private void splitOneUsersPrefs(double trainingPercentage, FastByIDMap <IPreferenceArray> trainingPrefs, FastByIDMap <IPreferenceArray> testPrefs, long userID, IDataModel dataModel) { List <IPreference> oneUserTrainingPrefs = null; List <IPreference> oneUserTestPrefs = null; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i)); if (random.nextDouble() < trainingPercentage) { if (oneUserTrainingPrefs == null) { oneUserTrainingPrefs = new List <IPreference>(3); } oneUserTrainingPrefs.Add(newPref); } else { if (oneUserTestPrefs == null) { oneUserTestPrefs = new List <IPreference>(3); } oneUserTestPrefs.Add(newPref); } } if (oneUserTrainingPrefs != null) { trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs)); if (oneUserTestPrefs != null) { testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs)); } } }
/// <p> /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This /// {@link DataModel} retains all this information in memory and is effectively immutable. /// </p> /// /// @param userData users to include /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. /// User IDs are mapped to maps of item IDs to long timestamps. public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap<FastIDSet>(); FastIDSet itemIDSet = new FastIDSet(); foreach (var entry in preferenceFromUsers.EntrySet()) { long userID = entry.Key; FastIDSet itemIDs1 = entry.Value; itemIDSet.AddAll(itemIDs1); var it = itemIDs1.GetEnumerator(); while (it.MoveNext()) { long itemID = it.Current; FastIDSet userIDs1 = preferenceForItems.Get(itemID); if (userIDs1 == null) { userIDs1 = new FastIDSet(2); preferenceForItems.Put(itemID, userIDs1); } userIDs1.Add(userID); } } this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.userIDs = new long[userData.Count()]; int i = 0; var it1 = userData.Keys.GetEnumerator(); while (it1.MoveNext()) { userIDs[i++] = it1.Current; } Array.Sort(userIDs); this.timestamps = timestamps; }
public static FastByIDMap<IPreferenceArray> ToDataMap(FastByIDMap<IList<IPreference>> data,long userId,long itemId, bool byUser) { var newData = new FastByIDMap<IPreferenceArray>(data.Count()); preferenceFromUsersRemoved = new FastByIDMap<IPreference>(); foreach (var entry in data.EntrySet()) { var prefList = entry.Value; if (entry.Key==userId) { preferenceFromUsersRemoved.Put(userId, prefList.FirstOrDefault(i => i.GetItemID() == itemId)); prefList.Remove(prefList.FirstOrDefault(i => i.GetItemID() == itemId)); } newData.Put(entry.Key, byUser ? (IPreferenceArray)new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(prefList)); } return newData; }
/// <summary>Swaps, in-place, <see cref="IList<T>"/>s for arrays in map values.</summary> /// <returns>input value</returns> public static FastByIDMap<IPreferenceArray> ToDataMap(FastByIDMap<IList<IPreference>> data, bool byUser) { var newData = new FastByIDMap<IPreferenceArray>( data.Count() ); foreach (var entry in data.EntrySet()) { var prefList = entry.Value; newData.Put( entry.Key, byUser ? (IPreferenceArray) new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(prefList) ); } return newData; }
private static FastByIDMap<String> buildTestFastMap() { FastByIDMap<String> map = new FastByIDMap<String>(); map.Put(500000L, "alpha"); map.Put(47L, "bang"); map.Put(2L, "beta"); return map; }
public void testVersusHashMap() { FastByIDMap<String> actual = new FastByIDMap<String>(); IDictionary<long, string> expected = new Dictionary<long,string>(1000000); var r = RandomUtils.getRandom(); for (int i = 0; i < 1000000; i++) { double d = r.nextDouble(); long key = (long) r.nextInt(100); if (d < 0.4) { Assert.AreEqual( expected.ContainsKey(key)?expected[key]:null, actual.Get(key)); } else { if (d < 0.7) { var expectedOldVal = expected.ContainsKey(key) ? expected[key] : null; expected[key] = "bang"; Assert.AreEqual(expectedOldVal, actual.Put(key, "bang")); } else { var expectedOldVal = expected.ContainsKey(key) ? expected[key] : null; expected.Remove(key); Assert.AreEqual(expectedOldVal, actual.Remove(key)); } Assert.AreEqual(expected.Count, actual.Count()); Assert.AreEqual(expected.Count==0, actual.IsEmpty()); } } }
public override void SetUp() { base.SetUp(); FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>(); userData.Put(1L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(1L, 1L, 5.0f), new GenericPreference(1L, 2L, 5.0f), new GenericPreference(1L, 3L, 2.0f) } )); userData.Put(2L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(2L, 1L, 2.0f), new GenericPreference(2L, 3L, 3.0f), new GenericPreference(2L, 4L, 5.0f) } )); userData.Put(3L, new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(3L, 2L, 5.0f), new GenericPreference(3L, 4L, 3.0f) } )); userData.Put(4L, new GenericUserPreferenceArray(new List<IPreference>() {new GenericPreference(4L, 1L, 3.0f), new GenericPreference(4L, 4L, 5.0f)})); dataModel = new GenericDataModel(userData); factorizer = new ALSWRFactorizer(dataModel, 3, 0.065, 10); }
public void setUpToyData() { this.rank = 3; this.lambda = 0.01; this.numIterations = 1000; FastByIDMap<IPreferenceArray> userData = new FastByIDMap<IPreferenceArray>(); userData.Put(1L, new GenericUserPreferenceArray( new List<IPreference>() { new GenericPreference(1L, 1L, 5.0f), new GenericPreference(1L, 2L, 5.0f), new GenericPreference(1L, 3L, 2.0f) })); userData.Put(2L, new GenericUserPreferenceArray(new List<IPreference>() { new GenericPreference(2L, 1L, 2.0f), new GenericPreference(2L, 3L, 3.0f), new GenericPreference(2L, 4L, 5.0f)} )); userData.Put(3L, new GenericUserPreferenceArray(new List<IPreference>() { new GenericPreference(3L, 2L, 5.0f), new GenericPreference(3L, 4L, 3.0f)})); userData.Put(4L, new GenericUserPreferenceArray(new List<IPreference>() { new GenericPreference(4L, 1L, 3.0f), new GenericPreference(4L, 4L, 5.0f)})); dataModel = new GenericDataModel(userData); }
/// <summary> /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable. /// </summary> /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param> /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param> public GenericDataModel(FastByIDMap <IPreferenceArray> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; FastByIDMap <IList <IPreference> > prefsForItems = new FastByIDMap <IList <IPreference> >(); FastIDSet itemIDSet = new FastIDSet(); int currentCount = 0; float maxPrefValue = float.NegativeInfinity; float minPrefValue = float.PositiveInfinity; foreach (var entry in preferenceFromUsers.EntrySet()) { IPreferenceArray prefs = entry.Value; prefs.SortByItem(); foreach (IPreference preference in prefs) { long itemID = preference.GetItemID(); itemIDSet.Add(itemID); var prefsForItem = prefsForItems.Get(itemID); if (prefsForItem == null) { prefsForItem = new List <IPreference>(2); prefsForItems.Put(itemID, prefsForItem); } prefsForItem.Add(preference); float value = preference.GetValue(); if (value > maxPrefValue) { maxPrefValue = value; } if (value < minPrefValue) { minPrefValue = value; } } if (++currentCount % 10000 == 0) { log.Info("Processed {0} users", currentCount); } } log.Info("Processed {0} users", currentCount); setMinPreference(minPrefValue); setMaxPreference(maxPrefValue); this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.preferenceForItems = ToDataMap(prefsForItems, false); foreach (var entry in preferenceForItems.EntrySet()) { entry.Value.SortByUser(); } this.userIDs = new long[userData.Count()]; int i = 0; foreach (var v in userData.Keys) { userIDs[i++] = v; } Array.Sort(userIDs); this.timestamps = timestamps; }
public void testGetUserIDs() { IPreferenceArray prefs = new GenericUserPreferenceArray(1); long sampleUserID = 1; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); FastByIDMap<IPreferenceArray> delegatePreferences = new FastByIDMap<IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); long anonymousUserID = instance.TakeAvailableUser().Value; IPreferenceArray tempPrefs = new GenericUserPreferenceArray(1); tempPrefs.SetUserID(0, anonymousUserID); tempPrefs.SetItemID(0, 22); instance.SetTempPrefs(tempPrefs, anonymousUserID); var userIDs = instance.GetUserIDs(); userIDs.MoveNext(); Assert.AreEqual(sampleUserID, userIDs.Current); Assert.False(userIDs.MoveNext()); }
public void testGetPreferencesForItem() { IPreferenceArray prefs = new GenericUserPreferenceArray(2); long sampleUserID = 4; prefs.SetUserID(0, sampleUserID); long sampleItemID = 11; prefs.SetItemID(0, sampleItemID); prefs.SetUserID(1, sampleUserID); long sampleItemID2 = 22; prefs.SetItemID(1, sampleItemID2); FastByIDMap<IPreferenceArray> delegatePreferences = new FastByIDMap<IPreferenceArray>(); delegatePreferences.Put(sampleUserID, prefs); PlusAnonymousConcurrentUserDataModel instance = getTestableWithDelegateData(10, delegatePreferences); long anonymousUserID = instance.TakeAvailableUser().Value; IPreferenceArray tempPrefs = new GenericUserPreferenceArray(2); tempPrefs.SetUserID(0, anonymousUserID); tempPrefs.SetItemID(0, sampleItemID); tempPrefs.SetUserID(1, anonymousUserID); long sampleItemID3 = 33; tempPrefs.SetItemID(1, sampleItemID3); instance.SetTempPrefs(tempPrefs, anonymousUserID); Assert.AreEqual(sampleUserID, instance.GetPreferencesForItem(sampleItemID).Get(0).GetUserID()); Assert.AreEqual(2, instance.GetPreferencesForItem(sampleItemID).Length()); Assert.AreEqual(1, instance.GetPreferencesForItem(sampleItemID2).Length()); Assert.AreEqual(1, instance.GetPreferencesForItem(sampleItemID3).Length()); Assert.AreEqual(2, instance.GetNumUsersWithPreferenceFor(sampleItemID)); Assert.AreEqual(1, instance.GetNumUsersWithPreferenceFor(sampleItemID, sampleItemID2)); Assert.AreEqual(1, instance.GetNumUsersWithPreferenceFor(sampleItemID, sampleItemID3)); }
/// Exports the simple user IDs and associated item IDs in the data model. /// /// @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing /// that user's associated items public static FastByIDMap<FastIDSet> toDataMap(IDataModel dataModel) { FastByIDMap<FastIDSet> data = new FastByIDMap<FastIDSet>(dataModel.GetNumUsers()); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; data.Put(userID, dataModel.GetItemIDsFromUser(userID)); } return data; }
/// <summary>Exports the simple user IDs and preferences in the data model.</summary> /// <returns>a <see cref="FastByIDMap"/> mapping user IDs to <see cref="IPreferenceArray"/>s representing that user's preferences</returns> public static FastByIDMap<IPreferenceArray> ToDataMap(IDataModel dataModel) { FastByIDMap<IPreferenceArray> data = new FastByIDMap<IPreferenceArray>(dataModel.GetNumUsers()); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; data.Put(userID, dataModel.GetPreferencesFromUser(userID)); } return data; }
private static FastByIDMap<int?> createIDMapping(int size, IEnumerator<long> idIterator) { var mapping = new FastByIDMap<int?>(size); int index = 0; while (idIterator.MoveNext()) { mapping.Put(idIterator.Current, index++); } return mapping; }
public void testPutAndGet() { FastByIDMap<long?> map = new FastByIDMap<long?>(); Assert.IsNull(map.Get(500000L)); map.Put(500000L, 2L); Assert.AreEqual(2L, (long) map.Get(500000L)); }
public static Factorization readBinary(Stream inFile) { var binRdr = new BinaryReader(inFile); int numFeatures = binRdr.ReadInt32(); int numUsers = binRdr.ReadInt32(); int numItems = binRdr.ReadInt32(); FastByIDMap<int?> userIDMapping = new FastByIDMap<int?>(numUsers); double[][] userFeatures = new double[numUsers][]; for (int n = 0; n < numUsers; n++) { int userIndex = binRdr.ReadInt32(); long userID = binRdr.ReadInt64(); userFeatures[userIndex] = new double[numFeatures]; userIDMapping.Put(userID, userIndex); for (int feature = 0; feature < numFeatures; feature++) { userFeatures[userIndex][feature] = binRdr.ReadDouble(); } } FastByIDMap<int?> itemIDMapping = new FastByIDMap<int?>(numItems); double[][] itemFeatures = new double[numItems][]; for (int n = 0; n < numItems; n++) { int itemIndex = binRdr.ReadInt32(); long itemID = binRdr.ReadInt64(); itemFeatures[itemIndex] = new double[numFeatures]; itemIDMapping.Put(itemID, itemIndex); for (int feature = 0; feature < numFeatures; feature++) { itemFeatures[itemIndex][feature] = binRdr.ReadDouble(); } } return new Factorization(userIDMapping, itemIDMapping, userFeatures, itemFeatures); }
private void splitOneUsersPrefs(double trainingPercentage, FastByIDMap<IPreferenceArray> trainingPrefs, FastByIDMap<IPreferenceArray> testPrefs, long userID, IDataModel dataModel) { List<IPreference> oneUserTrainingPrefs = null; List<IPreference> oneUserTestPrefs = null; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i)); if (random.nextDouble() < trainingPercentage) { if (oneUserTrainingPrefs == null) { oneUserTrainingPrefs = new List<IPreference>(3); } oneUserTrainingPrefs.Add(newPref); } else { if (oneUserTestPrefs == null) { oneUserTestPrefs = new List<IPreference>(3); } oneUserTestPrefs.Add(newPref); } } if (oneUserTrainingPrefs != null) { trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs)); if (oneUserTestPrefs != null) { testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs)); } } }
public void testSizeEmpty() { FastByIDMap<long> map = new FastByIDMap<long>(); Assert.AreEqual(0, map.Count()); Assert.True(map.IsEmpty()); map.Put(500000L, 2L); Assert.AreEqual(1, map.Count()); Assert.False(map.IsEmpty()); map.Remove(500000L); Assert.AreEqual(0, map.Count()); Assert.True(map.IsEmpty()); }
public void testPreferenceShufflerWithSyntheticData() { setUpSyntheticData(); ParallelSGDFactorizer.PreferenceShuffler shuffler = new ParallelSGDFactorizer.PreferenceShuffler(dataModel); shuffler.shuffle(); shuffler.stage(); FastByIDMap<FastByIDMap<bool?>> checkedLst = new FastByIDMap<FastByIDMap<bool?>>(); for (int i = 0; i < shuffler.size(); i++) { IPreference pref=shuffler.get(i); float? value = dataModel.GetPreferenceValue(pref.GetUserID(), pref.GetItemID()); Assert.AreEqual(pref.GetValue(), value.Value, 0.0); if (!checkedLst.ContainsKey(pref.GetUserID())) { checkedLst.Put(pref.GetUserID(), new FastByIDMap<bool?>()); } Assert.IsNull(checkedLst.Get(pref.GetUserID()).Get(pref.GetItemID())); checkedLst.Get(pref.GetUserID()).Put(pref.GetItemID(), true); } var userIDs = dataModel.GetUserIDs(); int index=0; while (userIDs.MoveNext()) { long userID = userIDs.Current; IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID); foreach (IPreference preference in preferencesFromUser) { Assert.True(checkedLst.Get(preference.GetUserID()).Get(preference.GetItemID()).Value); index++; } } Assert.AreEqual(index, shuffler.size()); }
public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<IPreferenceArray> data) { var res = new FastByIDMap<FastIDSet>( data.Count() ); foreach (var entry in data.EntrySet()) { IPreferenceArray prefArray = entry.Value; int size = prefArray.Length(); FastIDSet itemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { itemIDs.Add(prefArray.GetItemID(i)); } res.Put( entry.Key, itemIDs ); } return res; }