public double userSimilarity(long userID1, long userID2) { DataModel model = base.getDataModel(); FastIDSet other = model.getItemIDsFromUser(userID1); FastIDSet set2 = model.getItemIDsFromUser(userID2); int num = other.size(); int num2 = set2.size(); if ((num == 0) && (num2 == 0)) { return(double.NaN); } if ((num == 0) || (num2 == 0)) { return(0.0); } int num3 = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2); if (num3 == 0) { return(double.NaN); } int num4 = (num + num2) - num3; return(((double)num3) / ((double)num4)); }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap <IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List <IPreference> prefs2 = new List <IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count > 0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public static void Evaluate(IRecommender recommender, IDataModel model, int samples, IRunningAverage tracker, String tag) { printHeader(); var users = recommender.GetDataModel().GetUserIDs(); while (users.MoveNext()) { long userID = users.Current; var recs1 = recommender.Recommend(userID, model.GetNumItems()); IPreferenceArray prefs2 = model.GetPreferencesFromUser(userID); prefs2.SortByValueReversed(); FastIDSet commonSet = new FastIDSet(); long maxItemID = setBits(commonSet, recs1, samples); FastIDSet otherSet = new FastIDSet(); maxItemID = Math.Max(maxItemID, setBits(otherSet, prefs2, samples)); int max = mask(commonSet, otherSet, maxItemID); max = Math.Min(max, samples); if (max < 2) { continue; } long[] items1 = getCommonItems(commonSet, recs1, max); long[] items2 = getCommonItems(commonSet, prefs2, max); double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2); tracker.AddDatum(variance); } }
public void testContainsAndAdd() { FastIDSet set = new FastIDSet(); Assert.False(set.Contains(1)); set.Add(1); Assert.True(set.Contains(1)); }
public double getSimilarity(FastIDSet cluster1, FastIDSet cluster2) { if (cluster1.isEmpty() || cluster2.isEmpty()) { return(Double.NaN); } double leastSimilarity = Double.PositiveInfinity; var someUsers = SamplingLongPrimitiveIterator.maybeWrapIterator(cluster1.GetEnumerator(), samplingRate); while (someUsers.MoveNext()) { long userID1 = someUsers.Current; var it2 = cluster2.GetEnumerator(); while (it2.MoveNext()) { double theSimilarity = similarity.userSimilarity(userID1, it2.Current); if (theSimilarity < leastSimilarity) { leastSimilarity = theSimilarity; } } } // We skipped everything? well, at least try comparing the first Users to get some value if (leastSimilarity == Double.PositiveInfinity) { return(similarity.userSimilarity(cluster1.GetEnumerator().Current, cluster2.GetEnumerator().Current)); } return(leastSimilarity); }
public GenericDataModel(FastByIDMap <PreferenceArray> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { this.preferenceFromUsers = userData; FastByIDMap <List <Preference> > data = new FastByIDMap <List <Preference> >(); FastIDSet set = new FastIDSet(); int num = 0; float negativeInfinity = float.NegativeInfinity; float positiveInfinity = float.PositiveInfinity; foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceFromUsers.entrySet()) { PreferenceArray array = pair.Value; array.sortByItem(); foreach (Preference preference in array) { long key = preference.getItemID(); set.add(key); List <Preference> list = data.get(key); if (list == null) { list = new List <Preference>(2); data.put(key, list); } list.Add(preference); float num5 = preference.getValue(); if (num5 > negativeInfinity) { negativeInfinity = num5; } if (num5 < positiveInfinity) { positiveInfinity = num5; } } if ((++num % 0x2710) == 0) { log.info("Processed {0} users", new object[] { num }); } } log.info("Processed {0} users", new object[] { num }); this.setMinPreference(positiveInfinity); this.setMaxPreference(negativeInfinity); this.itemIDs = set.toArray(); set = null; Array.Sort <long>(this.itemIDs); this.preferenceForItems = toDataMap(data, false); foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceForItems.entrySet()) { pair.Value.sortByUser(); } this.userIDs = new long[userData.size()]; int num6 = 0; foreach (long num7 in userData.Keys) { this.userIDs[num6++] = num7; } Array.Sort <long>(this.userIDs); this.timestamps = timestamps; }
public static void evaluate(Recommender recommender, DataModel model, int samples, RunningAverage tracker, string tag) { printHeader(); IEnumerator <long> enumerator = recommender.getDataModel().getUserIDs(); while (enumerator.MoveNext()) { long current = enumerator.Current; List <RecommendedItem> items = recommender.recommend(current, model.getNumItems()); PreferenceArray prefs = model.getPreferencesFromUser(current); prefs.sortByValueReversed(); FastIDSet modelSet = new FastIDSet(); long num2 = setBits(modelSet, items, samples); FastIDSet set2 = new FastIDSet(); num2 = Math.Max(num2, setBits(set2, prefs, samples)); int max = Math.Min(mask(modelSet, set2, num2), samples); if (max >= 2) { long[] itemsL = getCommonItems(modelSet, items, max); long[] itemsR = getCommonItems(modelSet, prefs, max); double datum = scoreCommonSubset(tag, current, samples, max, itemsL, itemsR); tracker.addDatum(datum); } } }
private KeyValuePair <FastIDSet, FastIDSet> findNearestClusters(List <FastIDSet> clusters) { int size = clusters.Count; KeyValuePair <FastIDSet, FastIDSet> nearestPair = new KeyValuePair <FastIDSet, FastIDSet>(); double bestSimilarity = Double.NegativeInfinity; for (int i = 0; i < size; i++) { FastIDSet cluster1 = clusters[i]; for (int j = i + 1; j < size; j++) { if (samplingRate >= 1.0 || random.nextDouble() < samplingRate) { FastIDSet cluster2 = clusters[j]; double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2); if (!Double.IsNaN(similarity) && similarity > bestSimilarity) { bestSimilarity = similarity; nearestPair = new KeyValuePair <FastIDSet, FastIDSet>(cluster1, cluster2); } } } } return(nearestPair); }
private void buildClusters() { DataModel model = getDataModel(); int numUsers = model.getNumUsers(); if (numUsers > 0) { List <FastIDSet> newClusters = new List <FastIDSet>(); // Begin with a cluster for each user: var it = model.getUserIDs(); while (it.MoveNext()) { FastIDSet newCluster = new FastIDSet(); newCluster.add(it.Current); newClusters.Add(newCluster); } if (numUsers > 1) { findClusters(newClusters); } topRecsByUserID = computeTopRecsPerUserID(newClusters); clustersByUserID = computeClustersPerUserID(newClusters); allClusters = newClusters.ToArray(); } else { topRecsByUserID = new FastByIDMap <List <RecommendedItem> >(); clustersByUserID = new FastByIDMap <FastIDSet>(); allClusters = NO_CLUSTERS; } }
public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); FastIDSet xPrefs = dataModel.GetItemIDsFromUser(userID1); FastIDSet yPrefs = dataModel.GetItemIDsFromUser(userID2); int xPrefsSize = xPrefs.Count(); int yPrefsSize = yPrefs.Count(); if (xPrefsSize == 0 && yPrefsSize == 0) { return(Double.NaN); } if (xPrefsSize == 0 || yPrefsSize == 0) { return(0.0); } int intersectionSize = xPrefsSize < yPrefsSize?yPrefs.IntersectionSize(xPrefs) : xPrefs.IntersectionSize(yPrefs); if (intersectionSize == 0) { return(Double.NaN); } int unionSize = xPrefsSize + yPrefsSize - intersectionSize; return((double)intersectionSize / (double)unionSize); }
public void testStrategy() { FastIDSet allItemIDs = new FastIDSet(); allItemIDs.AddAll(new long[] { 1L, 2L, 3L }); FastIDSet preferredItemIDs = new FastIDSet(1); preferredItemIDs.Add(2L); var dataModelMock = new DynamicMock(typeof(IDataModel)); dataModelMock.ExpectAndReturn("GetNumItems", 3); dataModelMock.ExpectAndReturn("GetItemIDs", allItemIDs.GetEnumerator()); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(new List <IPreference>() { new GenericPreference(123L, 2L, 1.0f) }); ICandidateItemsStrategy strategy = new AllUnknownItemsCandidateItemsStrategy(); //EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance); Assert.AreEqual(2, candidateItems.Count()); Assert.True(candidateItems.Contains(1L)); Assert.True(candidateItems.Contains(3L)); dataModelMock.Verify(); //EasyMock.verify(dataModel); }
public FastIDSet getCluster(long userID) { buildClusters(); FastIDSet cluster = clustersByUserID.get(userID); return(cluster == null ? new FastIDSet() : cluster); }
private void buildClusters() { DataModel model = getDataModel(); int numUsers = model.getNumUsers(); if (numUsers == 0) { topRecsByUserID = new FastByIDMap <List <RecommendedItem> >(); clustersByUserID = new FastByIDMap <FastIDSet>(); } else { List <FastIDSet> clusters = new List <FastIDSet>(); // Begin with a cluster for each user: var it = model.getUserIDs(); while (it.MoveNext()) { FastIDSet newCluster = new FastIDSet(); newCluster.add(it.Current); clusters.Add(newCluster); } bool done = false; while (!done) { done = mergeClosestClusters(numUsers, clusters, done); } topRecsByUserID = computeTopRecsPerUserID(clusters); clustersByUserID = computeClustersPerUserID(clusters); allClusters = clusters.ToArray(); } }
private List <ClusterClusterPair> findClosestClusters(int numUsers, List <FastIDSet> clusters) { PriorityQueue <ClusterClusterPair> queue = new PriorityQueue <ClusterClusterPair>(numUsers + 1, new ClusterClusterPair()); int size = clusters.Count; for (int i = 0; i < size; i++) { FastIDSet cluster1 = clusters[i]; for (int j = i + 1; j < size; j++) { FastIDSet cluster2 = clusters[j]; double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2); if (!Double.IsNaN(similarity)) { if (queue.Count < numUsers) { queue.Push(new ClusterClusterPair(cluster1, cluster2, similarity)); } else if (similarity > queue.Pop().getSimilarity()) { queue.Push(new ClusterClusterPair(cluster1, cluster2, similarity)); queue.Pop(); } } } } List <ClusterClusterPair> result = queue.ToList(); result.Sort(); return(result); }
public static void Evaluate(IRecommender recommender1, IRecommender recommender2, int samples, IRunningAverage tracker, String tag) { printHeader(); var users = recommender1.GetDataModel().GetUserIDs(); while (users.MoveNext()) { long userID = users.Current; var recs1 = recommender1.Recommend(userID, samples); var recs2 = recommender2.Recommend(userID, samples); FastIDSet commonSet = new FastIDSet(); long maxItemID = setBits(commonSet, recs1, samples); FastIDSet otherSet = new FastIDSet(); maxItemID = Math.Max(maxItemID, setBits(otherSet, recs2, samples)); int max = mask(commonSet, otherSet, maxItemID); max = Math.Min(max, samples); if (max < 2) { continue; } long[] items1 = getCommonItems(commonSet, recs1, max); long[] items2 = getCommonItems(commonSet, recs2, max); double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2); tracker.AddDatum(variance); } }
public void testStrategy() { FastIDSet allItemIDs = new FastIDSet(); allItemIDs.AddAll(new long[] { 1L, 2L, 3L }); FastIDSet preferredItemIDs = new FastIDSet(1); preferredItemIDs.Add(2L); var dataModelMock = new DynamicMock( typeof( IDataModel )); dataModelMock.ExpectAndReturn("GetNumItems", 3); dataModelMock.ExpectAndReturn("GetItemIDs", allItemIDs.GetEnumerator()); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray( new List<IPreference>() { new GenericPreference(123L, 2L, 1.0f) } ); ICandidateItemsStrategy strategy = new AllUnknownItemsCandidateItemsStrategy(); //EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance); Assert.AreEqual(2, candidateItems.Count() ); Assert.True(candidateItems.Contains(1L)); Assert.True(candidateItems.Contains(3L)); dataModelMock.Verify(); //EasyMock.verify(dataModel); }
public void testStrategy() { FastIDSet itemIDsFromUser123 = new FastIDSet(); itemIDsFromUser123.Add(1L); FastIDSet itemIDsFromUser456 = new FastIDSet(); itemIDsFromUser456.Add(1L); itemIDsFromUser456.Add(2L); List<IPreference> prefs = new List<IPreference>(); prefs.Add(new GenericPreference(123L, 1L, 1.0f)); prefs.Add(new GenericPreference(456L, 1L, 1.0f)); IPreferenceArray preferencesForItem1 = new GenericItemPreferenceArray(prefs); var dataModelMock = new DynamicMock(typeof(IDataModel)); dataModelMock.ExpectAndReturn("GetPreferencesForItem", preferencesForItem1, (1L)); dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser123, (123L)); dataModelMock.ExpectAndReturn("GetItemIDsFromUser", itemIDsFromUser456, (456L)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray( new List<IPreference>() {new GenericPreference(123L, 1L, 1.0f)} ); ICandidateItemsStrategy strategy = new PreferredItemsNeighborhoodCandidateItemsStrategy(); //EasyMock.replay(dataModel); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, (IDataModel)dataModelMock.MockInstance); Assert.AreEqual(1, candidateItems.Count()); Assert.True(candidateItems.Contains(2L)); dataModelMock.Verify(); // EasyMock.verify(dataModel); }
public void testStrategy() { List <IPreference> prefsOfUser123 = new List <IPreference>(); prefsOfUser123.Add(new GenericPreference(123L, 1L, 1.0f)); List <IPreference> prefsOfUser456 = new List <IPreference>(); prefsOfUser456.Add(new GenericPreference(456L, 1L, 1.0f)); prefsOfUser456.Add(new GenericPreference(456L, 2L, 1.0f)); List <IPreference> prefsOfUser789 = new List <IPreference>(); prefsOfUser789.Add(new GenericPreference(789L, 1L, 0.5f)); prefsOfUser789.Add(new GenericPreference(789L, 3L, 1.0f)); IPreferenceArray prefArrayOfUser123 = new GenericUserPreferenceArray(prefsOfUser123); FastByIDMap <IPreferenceArray> userData = new FastByIDMap <IPreferenceArray>(); userData.Put(123L, prefArrayOfUser123); userData.Put(456L, new GenericUserPreferenceArray(prefsOfUser456)); userData.Put(789L, new GenericUserPreferenceArray(prefsOfUser789)); IDataModel dataModel = new GenericDataModel(userData); ICandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.GetNumUsers(), dataModel.GetNumItems()); FastIDSet candidateItems = strategy.GetCandidateItems(123L, prefArrayOfUser123, dataModel); Assert.True(candidateItems.Count() <= 1); Assert.False(candidateItems.Contains(1L)); }
private List <IRecommendedItem> doMostSimilarItems(long[] itemIDs, int howMany, TopItems.IEstimator <long> estimator) { FastIDSet possibleItemIDs = mostSimilarItemsCandidateItemsStrategy.GetCandidateItems(itemIDs, GetDataModel()); return(TopItems.GetTopItems(howMany, possibleItemIDs.GetEnumerator(), null, estimator)); }
public void testGrow() { FastIDSet set = new FastIDSet(1); set.Add(1); set.Add(2); Assert.True(set.Contains(1)); Assert.True(set.Contains(2)); }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { FastIDSet candidateItemIDs = new FastIDSet(); foreach (long itemID in preferredItemIDs) { candidateItemIDs.AddAll(similarity.AllSimilarItemIDs(itemID)); } candidateItemIDs.RemoveAll(preferredItemIDs); return candidateItemIDs; }
/// <summary> /// Creates a new <see cref="GenericDataModel"/> from the given users (and their preferences). This /// <see cref="IDataModel"/> retains all this information in memory and is effectively immutable. /// </summary> /// <param name="userData">users to include; (see also <see cref="GenericDataModel.ToDataMap(FastByIDMap, bool)"/>)</param> /// <param name="timestamps">timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. User IDs are mapped to maps of item IDs to long timestamps.</param> public GenericDataModel(FastByIDMap<IPreferenceArray> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; FastByIDMap<IList<IPreference>> prefsForItems = new FastByIDMap<IList<IPreference>>(); FastIDSet itemIDSet = new FastIDSet(); int currentCount = 0; float maxPrefValue = float.NegativeInfinity; float minPrefValue = float.PositiveInfinity; foreach (var entry in preferenceFromUsers.EntrySet()) { IPreferenceArray prefs = entry.Value; prefs.SortByItem(); foreach (IPreference preference in prefs) { long itemID = preference.GetItemID(); itemIDSet.Add(itemID); var prefsForItem = prefsForItems.Get(itemID); if (prefsForItem == null) { prefsForItem = new List<IPreference>(2); prefsForItems.Put(itemID, prefsForItem); } prefsForItem.Add(preference); float value = preference.GetValue(); if (value > maxPrefValue) { maxPrefValue = value; } if (value < minPrefValue) { minPrefValue = value; } } if (++currentCount % 10000 == 0) { log.Info("Processed {0} users", currentCount); } } log.Info("Processed {0} users", currentCount); setMinPreference(minPrefValue); setMaxPreference(maxPrefValue); this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.preferenceForItems = ToDataMap(prefsForItems, false); foreach (var entry in preferenceForItems.EntrySet()) { entry.Value.SortByUser(); } this.userIDs = new long[userData.Count()]; int i = 0; foreach (var v in userData.Keys) { userIDs[i++] = v; } Array.Sort(userIDs); this.timestamps = timestamps; }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems()); var allItemIDs = dataModel.GetItemIDs(); while (allItemIDs.MoveNext()) { possibleItemIDs.Add(allItemIDs.Current); } possibleItemIDs.RemoveAll(preferredItemIDs); return possibleItemIDs; }
public void testClear() { FastIDSet set = new FastIDSet(); set.Add(1); set.Clear(); Assert.AreEqual(0, set.Count()); Assert.True(set.IsEmpty()); Assert.False(set.Contains(1)); }
protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID) { IDataModel dataModel = GetDataModel(); FastIDSet possibleItemIDs = new FastIDSet(); foreach (long userID in theNeighborhood) { possibleItemIDs.AddAll(dataModel.GetItemIDsFromUser(userID)); } possibleItemIDs.RemoveAll(dataModel.GetItemIDsFromUser(theUserID)); return possibleItemIDs; }
protected override void prepareTraining() { int num; int num2; base.prepareTraining(); RandomWrapper wrapper = RandomUtils.getRandom(); this.p = new double[base.dataModel.getNumUsers()][]; for (num = 0; num < this.p.Length; num++) { this.p[num] = new double[base.numFeatures]; num2 = 0; while (num2 < RatingSGDFactorizer.FEATURE_OFFSET) { this.p[num][num2] = 0.0; num2++; } num2 = RatingSGDFactorizer.FEATURE_OFFSET; while (num2 < base.numFeatures) { this.p[num][num2] = wrapper.nextGaussian() * base.randomNoise; num2++; } } this.y = new double[base.dataModel.getNumItems()][]; for (num = 0; num < this.y.Length; num++) { this.y[num] = new double[base.numFeatures]; num2 = 0; while (num2 < RatingSGDFactorizer.FEATURE_OFFSET) { this.y[num][num2] = 0.0; num2++; } for (num2 = RatingSGDFactorizer.FEATURE_OFFSET; num2 < base.numFeatures; num2++) { this.y[num][num2] = wrapper.nextGaussian() * base.randomNoise; } } this.itemsByUser = new Dictionary <int, List <int> >(); IEnumerator <long> enumerator = base.dataModel.getUserIDs(); while (enumerator.MoveNext()) { long current = enumerator.Current; int num4 = base.userIndex(current); FastIDSet set = base.dataModel.getItemIDsFromUser(current); List <int> list = new List <int>(set.size()); this.itemsByUser[num4] = list; foreach (long num5 in set) { int item = base.itemIndex(num5); list.Add(item); } } }
public override FastIDSet GetItemIDsFromUser(long userID) { FastIDSet itemIDs = preferenceFromUsers.Get(userID); if (itemIDs == null) { throw new NoSuchUserException(userID); } return(itemIDs); }
public override FastIDSet getItemIDsFromUser(long userID) { FastIDSet set = this.preferenceFromUsers.get(userID); if (set == null) { throw new NoSuchUserException(userID); } return(set); }
public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); FastIDSet prefs1 = dataModel.GetItemIDsFromUser(userID1); FastIDSet prefs2 = dataModel.GetItemIDsFromUser(userID2); int prefs1Size = prefs1.Count(); int prefs2Size = prefs2.Count(); int intersectionSize = prefs1Size < prefs2Size?prefs2.IntersectionSize(prefs1) : prefs1.IntersectionSize(prefs2); return(doSimilarity(prefs1Size, prefs2Size, intersectionSize)); }
private void findClusters(List <FastIDSet> newClusters) { if (clusteringByThreshold) { KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters); FastIDSet _cluster1 = nearestPair.Key; FastIDSet _cluster2 = nearestPair.Value; if (_cluster1 != null && _cluster2 != null) { FastIDSet cluster1 = _cluster1; FastIDSet cluster2 = _cluster2; while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold) { newClusters.Remove(cluster1); newClusters.Remove(cluster2); FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); newClusters.Add(merged); nearestPair = findNearestClusters(newClusters); var __cluster1 = nearestPair.Key; var __cluster2 = nearestPair.Value; if (__cluster1 == null || __cluster2 == null) { break; } cluster1 = __cluster1; cluster2 = __cluster2; } } } else { while (newClusters.Count > numClusters) { KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters); FastIDSet _cluster1 = nearestPair.Key; FastIDSet _cluster2 = nearestPair.Value; if (_cluster1 == null || _cluster2 == null) { break; } FastIDSet cluster1 = _cluster1; FastIDSet cluster2 = _cluster2; newClusters.Remove(cluster1); newClusters.Remove(cluster2); FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); newClusters.Add(merged); } } }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) { FastIDSet set = new FastIDSet(); foreach (long num in preferredItemIDs) { set.addAll(this.similarity.allSimilarItemIDs(num)); } set.removeAll(preferredItemIDs); return(set); }
private void doIndex(long fromItemID, long toItemID) { FastIDSet similarItemIDs = similarItemIDsIndex.Get(fromItemID); if (similarItemIDs == null) { similarItemIDs = new FastIDSet(); similarItemIDsIndex.Put(fromItemID, similarItemIDs); } similarItemIDs.Add(toItemID); }
public virtual long[] AllSimilarItemIDs(long itemID) { FastIDSet allSimilarItemIDs = new FastIDSet(); var allItemIDs = dataModel.GetItemIDs(); while (allItemIDs.MoveNext()) { long possiblySimilarItemID = allItemIDs.Current; if (!Double.IsNaN(ItemSimilarity(itemID, possiblySimilarItemID))) { allSimilarItemIDs.Add(possiblySimilarItemID); } } return allSimilarItemIDs.ToArray(); }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { FastIDSet candidateItemIDs = new FastIDSet(); foreach (long itemID in preferredItemIDs) { candidateItemIDs.AddAll(similarity.AllSimilarItemIDs(itemID)); } candidateItemIDs.RemoveAll(preferredItemIDs); return(candidateItemIDs); }
private void doIndex(long fromItemID, long toItemID) { FastIDSet set = this.similarItemIDsIndex.get(fromItemID); if (set == null) { set = new FastIDSet(); this.similarItemIDsIndex.put(fromItemID, set); } set.add(toItemID); }
public void setTempPrefs(PreferenceArray prefs, long anonymousUserID) { this.tempPrefs[anonymousUserID] = prefs; FastIDSet set = new FastIDSet(); for (int i = 0; i < prefs.length(); i++) { set.add(prefs.getItemID(i)); } this.prefItemIDs[anonymousUserID] = set; }
public double userSimilarity(long userID1, long userID2) { DataModel model = base.getDataModel(); FastIDSet other = model.getItemIDsFromUser(userID1); FastIDSet set2 = model.getItemIDsFromUser(userID2); int num = other.size(); int num2 = set2.size(); int intersection = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2); return(doSimilarity(num, num2, intersection)); }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { FastIDSet possibleItemsIDs = new FastIDSet(); foreach (long itemID in preferredItemIDs) { IPreferenceArray itemPreferences = dataModel.GetPreferencesForItem(itemID); int numUsersPreferringItem = itemPreferences.Length(); for (int index = 0; index < numUsersPreferringItem; index++) { possibleItemsIDs.AddAll(dataModel.GetItemIDsFromUser(itemPreferences.GetUserID(index))); } } possibleItemsIDs.RemoveAll(preferredItemIDs); return possibleItemsIDs; }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems()); var allItemIDs = dataModel.GetItemIDs(); while (allItemIDs.MoveNext()) { possibleItemIDs.Add(allItemIDs.Current); } possibleItemIDs.RemoveAll(preferredItemIDs); return(possibleItemIDs); }
public override FastIDSet GetItemIDsFromUser(long userID) { IPreferenceArray prefs = GetPreferencesFromUser(userID); int size = prefs.Length(); FastIDSet result = new FastIDSet(size); for (int i = 0; i < size; i++) { result.Add(prefs.GetItemID(i)); } return(result); }
protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID) { DataModel model = this.getDataModel(); FastIDSet set = new FastIDSet(); foreach (long num in theNeighborhood) { set.addAll(model.getItemIDsFromUser(num)); } set.removeAll(model.getItemIDsFromUser(theUserID)); return(set); }
public override FastIDSet getItemIDsFromUser(long userID) { PreferenceArray array = this.getPreferencesFromUser(userID); int size = array.length(); FastIDSet set = new FastIDSet(size); for (int i = 0; i < size; i++) { set.add(array.getItemID(i)); } return(set); }
public FastIDSet GetRelevantItemsIDs(long userID, int at, double relevanceThreshold, IDataModel dataModel) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); FastIDSet relevantItemIDs = new FastIDSet(at); prefs.SortByValueReversed(); for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) { if (prefs.GetValue(i) >= relevanceThreshold) { relevantItemIDs.Add(prefs.GetItemID(i)); } } return relevantItemIDs; }
public static IDataModel getBooleanDataModel(long[] userIDs, bool[][] prefs) { FastByIDMap<FastIDSet> result = new FastByIDMap<FastIDSet>(); for (int i = 0; i < userIDs.Length; i++) { FastIDSet prefsSet = new FastIDSet(); for (int j = 0; j < prefs[i].Length; j++) { if (prefs[i][j]) { prefsSet.Add(j); } } if (!prefsSet.IsEmpty()) { result.Put(userIDs[i], prefsSet); } } return new GenericBooleanPrefDataModel(result); }
public override long[] GetUserNeighborhood(long userID) { IDataModel dataModel = getDataModel(); FastIDSet neighborhood = new FastIDSet(); var usersIterable = SamplinglongPrimitiveIterator.MaybeWrapIterator(dataModel .GetUserIDs(), getSamplingRate()); IUserSimilarity userSimilarityImpl = getUserSimilarity(); while (usersIterable.MoveNext()) { long otherUserID = usersIterable.Current; if (userID != otherUserID) { double theSimilarity = userSimilarityImpl.UserSimilarity(userID, otherUserID); if (!Double.IsNaN(theSimilarity) && theSimilarity >= threshold) { neighborhood.Add(otherUserID); } } } return neighborhood.ToArray(); }
public void recommend() { var dataModelMock = new DynamicMock( typeof(IDataModel) ); var preferencesFromUserMock = new DynamicMock( typeof(IPreferenceArray) ); var candidateItemsStrategyMock = new DynamicMock( typeof(ICandidateItemsStrategy) ); var factorizerMock = new DynamicMock( typeof(IFactorizer) ); var factorization = new Factorization_recommend_TestMock(); FastIDSet candidateItems = new FastIDSet(); candidateItems.Add(5L); candidateItems.Add(3L); factorizerMock.ExpectAndReturn("Factorize", factorization); dataModelMock.ExpectAndReturn("GetPreferencesFromUser", preferencesFromUserMock.MockInstance, (1L)); candidateItemsStrategyMock.ExpectAndReturn("GetCandidateItems", candidateItems, 1L, preferencesFromUserMock.MockInstance, dataModelMock.MockInstance); //EasyMock.replay(dataModel, candidateItemsStrategy, factorizer, factorization); SVDRecommender svdRecommender = new SVDRecommender( (IDataModel)dataModelMock.MockInstance, (IFactorizer)factorizerMock.MockInstance, (ICandidateItemsStrategy)candidateItemsStrategyMock.MockInstance); IList<IRecommendedItem> recommendedItems = svdRecommender.Recommend(1L, 5); Assert.AreEqual(2, recommendedItems.Count); Assert.AreEqual(3L, recommendedItems[0].GetItemID()); Assert.AreEqual(2.0f, recommendedItems[0].GetValue(), EPSILON); Assert.AreEqual(5L, recommendedItems[1].GetItemID()); Assert.AreEqual(1.0f, recommendedItems[1].GetValue(), EPSILON); dataModelMock.Verify(); candidateItemsStrategyMock.Verify(); factorizerMock.Verify(); Assert.AreEqual(2, factorization.getItemFeaturesCallCount); Assert.AreEqual(2, factorization.getUserFeaturesCallCount); //EasyMock.verify(dataModel, candidateItemsStrategy, factorizer, factorization); }
/// <p> /// Creates a new {@link GenericDataModel} from the given users (and their preferences). This /// {@link DataModel} retains all this information in memory and is effectively immutable. /// </p> /// /// @param userData users to include /// @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. /// User IDs are mapped to maps of item IDs to long timestamps. public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<DateTime?>> timestamps) { //Preconditions.checkArgument(userData != null, "userData is null"); this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap<FastIDSet>(); FastIDSet itemIDSet = new FastIDSet(); foreach (var entry in preferenceFromUsers.EntrySet()) { long userID = entry.Key; FastIDSet itemIDs1 = entry.Value; itemIDSet.AddAll(itemIDs1); var it = itemIDs1.GetEnumerator(); while (it.MoveNext()) { long itemID = it.Current; FastIDSet userIDs1 = preferenceForItems.Get(itemID); if (userIDs1 == null) { userIDs1 = new FastIDSet(2); preferenceForItems.Put(itemID, userIDs1); } userIDs1.Add(userID); } } this.itemIDs = itemIDSet.ToArray(); itemIDSet = null; // Might help GC -- this is big Array.Sort(itemIDs); this.userIDs = new long[userData.Count()]; int i = 0; var it1 = userData.Keys.GetEnumerator(); while (it1.MoveNext()) { userIDs[i++] = it1.Current; } Array.Sort(userIDs); this.timestamps = timestamps; }
public void ProcessOtherUser(long userID, FastIDSet relevantItemIDs, FastByIDMap<IPreferenceArray> trainingUsers, long otherUserID, IDataModel dataModel) { IPreferenceArray prefs2Array = dataModel.GetPreferencesFromUser(otherUserID); // If we're dealing with the very user that we're evaluating for precision/recall, if (userID == otherUserID) { // then must remove all the test IDs, the "relevant" item IDs List<IPreference> prefs2 = new List<IPreference>(prefs2Array.Length()); foreach (IPreference pref in prefs2Array) { if (!relevantItemIDs.Contains(pref.GetItemID())) { prefs2.Add(pref); } } if (prefs2.Count>0) { trainingUsers.Put(otherUserID, new GenericUserPreferenceArray(prefs2)); } } else { // otherwise just add all those other user's prefs trainingUsers.Put(otherUserID, prefs2Array); } }
public override FastIDSet GetItemIDsFromUser(long userID) { IPreferenceArray prefs = GetPreferencesFromUser(userID); int size = prefs.Length(); FastIDSet result = new FastIDSet(size); for (int i = 0; i < size; i++) { result.Add(prefs.GetItemID(i)); } return result; }
/// Sets temporary preferences for a given anonymous user. public void SetTempPrefs(IPreferenceArray prefs, long anonymousUserID) { //Preconditions.checkArgument(prefs != null && prefs.Length() > 0, "prefs is null or empty"); this.tempPrefs[anonymousUserID] = prefs; FastIDSet userPrefItemIDs = new FastIDSet(); for (int i = 0; i < prefs.Length(); i++) { userPrefItemIDs.Add(prefs.GetItemID(i)); } this.prefItemIDs[anonymousUserID] = userPrefItemIDs; }
public void testReservedValues() { FastIDSet set = new FastIDSet(); try { set.Add(Int64.MinValue); Assert.Fail("Should have thrown IllegalArgumentException"); } catch (ArgumentException iae) { //IllegalArgumentException // good } Assert.False(set.Contains(Int64.MinValue)); try { set.Add(long.MaxValue); Assert.Fail("Should have thrown IllegalArgumentException"); } catch (ArgumentException iae) { // good } Assert.False(set.Contains(long.MaxValue)); }
private static long setBits(FastIDSet modelSet, IPreferenceArray prefs, int max) { long maxItem = -1; for (int i = 0; i < prefs.Length() && i < max; i++) { long itemID = prefs.GetItemID(i); modelSet.Add(itemID); if (itemID > maxItem) { maxItem = itemID; } } return maxItem; }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { var preferredItemIDsIterator = ((IEnumerable<long>)preferredItemIDs).GetEnumerator(); if (preferredItemIDs.Length > maxItems) { double samplingRate = (double) maxItems / preferredItemIDs.Length; log.Info("preferredItemIDs.Length {0}, samplingRate {1}", preferredItemIDs.Length, samplingRate); preferredItemIDsIterator = new SamplinglongPrimitiveIterator(preferredItemIDsIterator, samplingRate); } FastIDSet possibleItemsIDs = new FastIDSet(); while (preferredItemIDsIterator.MoveNext()) { long itemID = preferredItemIDsIterator.Current; IPreferenceArray prefs = dataModel.GetPreferencesForItem(itemID); int prefsLength = prefs.Length(); if (prefsLength > maxUsersPerItem) { var sampledPrefs = new FixedSizeSamplingIterator<IPreference>(maxUsersPerItem, prefs.GetEnumerator()); while (sampledPrefs.MoveNext()) { addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(sampledPrefs.Current.GetUserID())); } } else { for (int i = 0; i < prefsLength; i++) { addSomeOf(possibleItemsIDs, dataModel.GetItemIDsFromUser(prefs.GetUserID(i))); } } } possibleItemsIDs.RemoveAll(preferredItemIDs); return possibleItemsIDs; }
public void testVersusHashSet() { FastIDSet actual = new FastIDSet(1); var expected = new HashSet<int>(); //1000000 var r = RandomUtils.getRandom(); for (int i = 0; i < 1000000; i++) { double d = r.nextDouble(); var key = r.nextInt(100); if (d < 0.4) { Assert.AreEqual(expected.Contains(key), actual.Contains(key)); } else { if (d < 0.7) { Assert.AreEqual(expected.Add(key), actual.Add(key)); } else { Assert.AreEqual(expected.Remove(key), actual.Remove(key)); } Assert.AreEqual(expected.Count, actual.Count() ); Assert.AreEqual(expected.Count==0, actual.IsEmpty()); } } }
private static FastIDSet buildTestFastSet() { FastIDSet set = new FastIDSet(); set.Add(1); set.Add(2); set.Add(3); return set; }
public PlusAnonymousUserDataModel(IDataModel deleg) { this._delegate = deleg; this.prefItemIDs = new FastIDSet(); }
private static long setBits(FastIDSet modelSet, IList<IRecommendedItem> items, int max) { long maxItem = -1; for (int i = 0; i < items.Count && i < max; i++) { long itemID = items[i].GetItemID(); modelSet.Add(itemID); if (itemID > maxItem) { maxItem = itemID; } } return maxItem; }
private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) { if (itemIDs.Count() > maxItemsPerUser) { var it = new SamplinglongPrimitiveIterator(itemIDs.GetEnumerator(), (double) maxItemsPerUser / itemIDs.Count() ); while (it.MoveNext()) { possibleItemIDs.Add(it.Current); } } else { possibleItemIDs.AddAll(itemIDs); } }
public void testSizeEmpty() { FastIDSet set = new FastIDSet(); Assert.AreEqual(0, set.Count()); Assert.True(set.IsEmpty()); set.Add(1); Assert.AreEqual(1, set.Count()); Assert.False(set.IsEmpty()); set.Remove(1); Assert.AreEqual(0, set.Count()); Assert.True(set.IsEmpty()); }
public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<IPreferenceArray> data) { var res = new FastByIDMap<FastIDSet>( data.Count() ); foreach (var entry in data.EntrySet()) { IPreferenceArray prefArray = entry.Value; int size = prefArray.Length(); FastIDSet itemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { itemIDs.Add(prefArray.GetItemID(i)); } res.Put( entry.Key, itemIDs ); } return res; }