internal static IDataModel BuildModel(IList <UserItem> userItems, bool isReviewBased) { FastByIDMap <IList <IPreference> > userPreferencesMap = new FastByIDMap <IList <IPreference> >(); foreach (var userItem in userItems) { var userPreferences = userPreferencesMap.Get(userItem.UserId); if (userPreferences == null) { userPreferences = new List <IPreference>(3); userPreferencesMap.Put(userItem.UserId, userPreferences); } if (isReviewBased) { userPreferences.Add(new GenericPreference(userItem.UserId, userItem.ItemId, userItem.Rating)); } else { userPreferences.Add(new BooleanPreference(userItem.UserId, userItem.ItemId)); } } var resultUserPreferences = new FastByIDMap <IPreferenceArray>(userPreferencesMap.Count()); foreach (var entry in userPreferencesMap.EntrySet()) { var prefList = (List <IPreference>)entry.Value; resultUserPreferences.Put(entry.Key, isReviewBased ? new GenericUserPreferenceArray(prefList) : (IPreferenceArray) new BooleanUserPreferenceArray(prefList)); } return(new GenericDataModel(resultUserPreferences)); }
/// <summary> /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable. /// </summary> /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param> /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param> public GenericDataModel(FastByIDMap<IPreferenceArray> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; FastByIDMap<IList<IPreference>> prefsForItems = new FastByIDMap<IList<IPreference>>(); FastIDSet itemIDSet = new FastIDSet(); int currentCount = 0; float maxPrefValue = float.NegativeInfinity; float minPrefValue = float.PositiveInfinity; foreach (var entry in preferenceFromUsers.EntrySet()) { IPreferenceArray prefs = entry.Value; prefs.SortByItem(); foreach (IPreference preference in prefs) { long itemID = preference.GetItemID(); itemIDSet.Add(itemID); var prefsForItem = prefsForItems.Get(itemID); if (prefsForItem == null) { prefsForItem = new List<IPreference>(2); prefsForItems.Put(itemID, prefsForItem); } prefsForItem.Add(preference); float value = preference.GetValue(); if (value > maxPrefValue) { maxPrefValue = value; } if (value < minPrefValue) { minPrefValue = value; } } if (++currentCount % 10000 == 0) { log.Info("Processed {0} users", currentCount); } } log.Info("Processed {0} users", currentCount); setMinPreference(minPrefValue); setMaxPreference(maxPrefValue); this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.preferenceForItems = ToDataMap(prefsForItems, false); foreach (var entry in preferenceForItems.EntrySet()) { entry.Value.SortByUser(); } this.userIDs = new long[userData.Count()]; int i = 0; foreach (var v in userData.Keys) { userIDs[i++] = v; } Array.Sort(userIDs); this.timestamps = timestamps; }
/// <summary>Swaps, in-place, <see cref="IList<T>"/>s for arrays in map values.</summary> /// <returns>input value</returns> public static FastByIDMap <IPreferenceArray> ToDataMap(FastByIDMap <IList <IPreference> > data, bool byUser) { var newData = new FastByIDMap <IPreferenceArray>(data.Count()); foreach (var entry in data.EntrySet()) { var prefList = entry.Value; newData.Put(entry.Key, byUser ? (IPreferenceArray) new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(prefList)); } return(newData); }
public static FastByIDMap <FastIDSet> toDataMap(FastByIDMap <IPreferenceArray> data) { var res = new FastByIDMap <FastIDSet>(data.Count()); foreach (var entry in data.EntrySet()) { IPreferenceArray prefArray = entry.Value; int size = prefArray.Length(); FastIDSet itemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { itemIDs.Add(prefArray.GetItemID(i)); } res.Put(entry.Key, itemIDs); } return(res); }
/// <p> /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This /// {@link DataModel} retains all this information in memory and is effectively immutable. /// </p> /// /// @param userData users to include /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. /// User IDs are mapped to maps of item IDs to long timestamps. public GenericBooleanPrefDataModel(FastByIDMap <FastIDSet> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap <FastIDSet>(); FastIDSet itemIDSet = new FastIDSet(); foreach (var entry in preferenceFromUsers.EntrySet()) { long userID = entry.Key; FastIDSet itemIDs1 = entry.Value; itemIDSet.AddAll(itemIDs1); var it = itemIDs1.GetEnumerator(); while (it.MoveNext()) { long itemID = it.Current; FastIDSet userIDs1 = preferenceForItems.Get(itemID); if (userIDs1 == null) { userIDs1 = new FastIDSet(2); preferenceForItems.Put(itemID, userIDs1); } userIDs1.Add(userID); } } this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.userIDs = new long[userData.Count()]; int i = 0; var it1 = userData.Keys.GetEnumerator(); while (it1.MoveNext()) { userIDs[i++] = it1.Current; } Array.Sort(userIDs); this.timestamps = timestamps; }
private double getEvaluation(FastByIDMap <IPreferenceArray> testPrefs, IRecommender recommender) { reset(); var estimateCallables = new List <Action>(); AtomicInteger noEstimateCounter = new AtomicInteger(); foreach (var entry in testPrefs.EntrySet()) { estimateCallables.Add(() => { var testUserID = entry.Key; var prefs = entry.Value; foreach (IPreference realPref in prefs) { float estimatedPreference = float.NaN; try { estimatedPreference = recommender.EstimatePreference(testUserID, realPref.GetItemID()); } catch (NoSuchUserException nsue) { // It's possible that an item exists in the test data but not training data in which case // NSEE will be thrown. Just ignore it and move on. log.Info("User exists in test data but not training data: {}", testUserID); } catch (NoSuchItemException nsie) { log.Info("Item exists in test data but not training data: {}", realPref.GetItemID()); } if (float.IsNaN(estimatedPreference)) { noEstimateCounter.incrementAndGet(); } else { estimatedPreference = capEstimatedPreference(estimatedPreference); processOneEstimate(estimatedPreference, realPref); } } }); // new PreferenceEstimateCallable(recommender, entry.Key, entry.Value, noEstimateCounter)); } log.Info("Beginning evaluation of {} users", estimateCallables.Count); IRunningAverageAndStdDev timing = new FullRunningAverageAndStdDev(); execute(estimateCallables, noEstimateCounter, timing); return(computeFinalEvaluation()); }
public IDataModel Load() { var hasPrefVal = !String.IsNullOrEmpty(PrefValFld); FastByIDMap <IList <IPreference> > data = new FastByIDMap <IList <IPreference> >(); using (var dbRdr = SelectCmd.ExecuteReader()) { while (dbRdr.Read()) { long userID = Convert.ToInt64(dbRdr[UserIdFld]); long itemID = Convert.ToInt64(dbRdr[ItemIdFld]); var userPrefs = data.Get(userID); if (userPrefs == null) { userPrefs = new List <IPreference>(3); data.Put(userID, userPrefs); } if (hasPrefVal) { var prefVal = Convert.ToSingle(dbRdr[PrefValFld]); userPrefs.Add(new GenericPreference(userID, itemID, prefVal)); } else { userPrefs.Add(new BooleanPreference(userID, itemID)); } } } var newData = new FastByIDMap <IPreferenceArray>(data.Count()); foreach (var entry in data.EntrySet()) { var prefList = (List <IPreference>)entry.Value; newData.Put(entry.Key, hasPrefVal ? (IPreferenceArray) new GenericUserPreferenceArray(prefList) : (IPreferenceArray) new BooleanUserPreferenceArray(prefList)); } return(new GenericDataModel(newData)); }
private IDataModel GetDataModel() { var cacheKey = "RecommenderDataModel"; IDataModel dataModel = _memoryCache.Get <IDataModel>(cacheKey); if (dataModel != null) { return(dataModel); } var movieRatings = _unitOfWork.MovieRatingRepository.GetAll(); FastByIDMap <IList <IPreference> > data = new FastByIDMap <IList <IPreference> >(); foreach (var movieRating in movieRatings) { var userPreferences = data.Get(movieRating.UserId); if (userPreferences == null) { userPreferences = new List <IPreference>(3); data.Put(movieRating.UserId, userPreferences); } userPreferences.Add(new BooleanPreference(movieRating.UserId, movieRating.MovieId)); } var newData = new FastByIDMap <IPreferenceArray>(data.Count()); foreach (var entry in data.EntrySet()) { var prefList = (List <IPreference>)entry.Value; newData.Put(entry.Key, (IPreferenceArray) new BooleanUserPreferenceArray(prefList)); } dataModel = new GenericDataModel(newData); _memoryCache.Set(cacheKey, dataModel); return(new GenericDataModel(newData)); }
/// <p> /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This /// {@link DataModel} retains all this information in memory and is effectively immutable. /// </p> /// /// @param userData users to include /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. /// User IDs are mapped to maps of item IDs to long timestamps. public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap<FastIDSet>(); FastIDSet itemIDSet = new FastIDSet(); foreach (var entry in preferenceFromUsers.EntrySet()) { long userID = entry.Key; FastIDSet itemIDs1 = entry.Value; itemIDSet.AddAll(itemIDs1); var it = itemIDs1.GetEnumerator(); while (it.MoveNext()) { long itemID = it.Current; FastIDSet userIDs1 = preferenceForItems.Get(itemID); if (userIDs1 == null) { userIDs1 = new FastIDSet(2); preferenceForItems.Put(itemID, userIDs1); } userIDs1.Add(userID); } } this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.userIDs = new long[userData.Count()]; int i = 0; var it1 = userData.Keys.GetEnumerator(); while (it1.MoveNext()) { userIDs[i++] = it1.Current; } Array.Sort(userIDs); this.timestamps = timestamps; }
private double getEvaluation(FastByIDMap<IPreferenceArray> testPrefs, IRecommender recommender) { reset(); var estimateCallables = new List<Action>(); AtomicInteger noEstimateCounter = new AtomicInteger(); foreach (var entry in testPrefs.EntrySet()) { estimateCallables.Add( () => { var testUserID = entry.Key; var prefs = entry.Value; foreach (IPreference realPref in prefs) { float estimatedPreference = float.NaN; try { estimatedPreference = recommender.EstimatePreference(testUserID, realPref.GetItemID()); } catch (NoSuchUserException nsue) { // It's possible that an item exists in the test data but not training data in which case // NSEE will be thrown. Just ignore it and move on. log.Info("User exists in test data but not training data: {}", testUserID); } catch (NoSuchItemException nsie) { log.Info("Item exists in test data but not training data: {}", realPref.GetItemID()); } if (float.IsNaN(estimatedPreference)) { noEstimateCounter.incrementAndGet(); } else { estimatedPreference = capEstimatedPreference(estimatedPreference); processOneEstimate(estimatedPreference, realPref); } } }); // new PreferenceEstimateCallable(recommender, entry.Key, entry.Value, noEstimateCounter)); } log.Info("Beginning evaluation of {} users", estimateCallables.Count); IRunningAverageAndStdDev timing = new FullRunningAverageAndStdDev(); execute(estimateCallables, noEstimateCounter, timing); return computeFinalEvaluation(); }
public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<IPreferenceArray> data) { var res = new FastByIDMap<FastIDSet>( data.Count() ); foreach (var entry in data.EntrySet()) { IPreferenceArray prefArray = entry.Value; int size = prefArray.Length(); FastIDSet itemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { itemIDs.Add(prefArray.GetItemID(i)); } res.Put( entry.Key, itemIDs ); } return res; }
/// <summary> /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable. /// </summary> /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param> /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param> public GenericDataModel(FastByIDMap <IPreferenceArray> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; FastByIDMap <IList <IPreference> > prefsForItems = new FastByIDMap <IList <IPreference> >(); FastIDSet itemIDSet = new FastIDSet(); int currentCount = 0; float maxPrefValue = float.NegativeInfinity; float minPrefValue = float.PositiveInfinity; foreach (var entry in preferenceFromUsers.EntrySet()) { IPreferenceArray prefs = entry.Value; prefs.SortByItem(); foreach (IPreference preference in prefs) { long itemID = preference.GetItemID(); itemIDSet.Add(itemID); var prefsForItem = prefsForItems.Get(itemID); if (prefsForItem == null) { prefsForItem = new List <IPreference>(2); prefsForItems.Put(itemID, prefsForItem); } prefsForItem.Add(preference); float value = preference.GetValue(); if (value > maxPrefValue) { maxPrefValue = value; } if (value < minPrefValue) { minPrefValue = value; } } if (++currentCount % 10000 == 0) { log.Info("Processed {0} users", currentCount); } } log.Info("Processed {0} users", currentCount); setMinPreference(minPrefValue); setMaxPreference(maxPrefValue); this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.preferenceForItems = ToDataMap(prefsForItems, false); foreach (var entry in preferenceForItems.EntrySet()) { entry.Value.SortByUser(); } this.userIDs = new long[userData.Count()]; int i = 0; foreach (var v in userData.Keys) { userIDs[i++] = v; } Array.Sort(userIDs); this.timestamps = timestamps; }
public static FastByIDMap<IPreferenceArray> ToDataMap(FastByIDMap<IList<IPreference>> data,long userId,long itemId, bool byUser) { var newData = new FastByIDMap<IPreferenceArray>(data.Count()); preferenceFromUsersRemoved = new FastByIDMap<IPreference>(); foreach (var entry in data.EntrySet()) { var prefList = entry.Value; if (entry.Key==userId) { preferenceFromUsersRemoved.Put(userId, prefList.FirstOrDefault(i => i.GetItemID() == itemId)); prefList.Remove(prefList.FirstOrDefault(i => i.GetItemID() == itemId)); } newData.Put(entry.Key, byUser ? (IPreferenceArray)new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(prefList)); } return newData; }
/// <summary>Swaps, in-place, <see cref="IList<T>"/>s for arrays in map values.</summary> /// <returns>input value</returns> public static FastByIDMap<IPreferenceArray> ToDataMap(FastByIDMap<IList<IPreference>> data, bool byUser) { var newData = new FastByIDMap<IPreferenceArray>( data.Count() ); foreach (var entry in data.EntrySet()) { var prefList = entry.Value; newData.Put( entry.Key, byUser ? (IPreferenceArray) new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(prefList) ); } return newData; }
public IEnumerable <KeyValuePair <long, int?> > getUserIDMappings() { return(userIDMapping.EntrySet()); }