private long processLine(String line, long averageCount) { if (string.IsNullOrEmpty(line) || line[0] == COMMENT_CHAR) { return(averageCount); } String[] tokens = SEPARATOR.Split(line); //Preconditions.checkArgument(tokens.length >= 3 && tokens.length != 5, "Bad line: %s", line); long itemID1 = long.Parse(tokens[0]); long itemID2 = long.Parse(tokens[1]); double diff = double.Parse(tokens[2]); int count = tokens.Length >= 4 ? int.Parse(tokens[3]) : 1; bool hasMkSk = tokens.Length >= 5; if (itemID1 > itemID2) { long temp = itemID1; itemID1 = itemID2; itemID2 = temp; } FastByIDMap <RunningAverage> level1Map = averageDiffs.get(itemID1); if (level1Map == null) { level1Map = new FastByIDMap <RunningAverage>(); averageDiffs.put(itemID1, level1Map); } RunningAverage average = level1Map.get(itemID2); if (average != null) { throw new Exception("Duplicated line for item-item pair " + itemID1 + " / " + itemID2); } if (averageCount < maxEntries) { if (hasMkSk) { double mk = Double.Parse(tokens[4]); double sk = Double.Parse(tokens[5]); average = new FullRunningAverageAndStdDev(count, diff, mk, sk); } else { average = new FullRunningAverage(count, diff); } level1Map.put(itemID2, average); averageCount++; } allRecommendableItemIDs.add(itemID1); allRecommendableItemIDs.add(itemID2); return(averageCount); }
private void buildClusters() { DataModel model = getDataModel(); int numUsers = model.getNumUsers(); if (numUsers > 0) { List <FastIDSet> newClusters = new List <FastIDSet>(); // Begin with a cluster for each user: var it = model.getUserIDs(); while (it.MoveNext()) { FastIDSet newCluster = new FastIDSet(); newCluster.add(it.Current); newClusters.Add(newCluster); } if (numUsers > 1) { findClusters(newClusters); } topRecsByUserID = computeTopRecsPerUserID(newClusters); clustersByUserID = computeClustersPerUserID(newClusters); allClusters = newClusters.ToArray(); } else { topRecsByUserID = new FastByIDMap <List <RecommendedItem> >(); clustersByUserID = new FastByIDMap <FastIDSet>(); allClusters = NO_CLUSTERS; } }
public GenericDataModel(FastByIDMap <PreferenceArray> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { this.preferenceFromUsers = userData; FastByIDMap <List <Preference> > data = new FastByIDMap <List <Preference> >(); FastIDSet set = new FastIDSet(); int num = 0; float negativeInfinity = float.NegativeInfinity; float positiveInfinity = float.PositiveInfinity; foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceFromUsers.entrySet()) { PreferenceArray array = pair.Value; array.sortByItem(); foreach (Preference preference in array) { long key = preference.getItemID(); set.add(key); List <Preference> list = data.get(key); if (list == null) { list = new List <Preference>(2); data.put(key, list); } list.Add(preference); float num5 = preference.getValue(); if (num5 > negativeInfinity) { negativeInfinity = num5; } if (num5 < positiveInfinity) { positiveInfinity = num5; } } if ((++num % 0x2710) == 0) { log.info("Processed {0} users", new object[] { num }); } } log.info("Processed {0} users", new object[] { num }); this.setMinPreference(positiveInfinity); this.setMaxPreference(negativeInfinity); this.itemIDs = set.toArray(); set = null; Array.Sort <long>(this.itemIDs); this.preferenceForItems = toDataMap(data, false); foreach (KeyValuePair <long, PreferenceArray> pair in this.preferenceForItems.entrySet()) { pair.Value.sortByUser(); } this.userIDs = new long[userData.size()]; int num6 = 0; foreach (long num7 in userData.Keys) { this.userIDs[num6++] = num7; } Array.Sort <long>(this.userIDs); this.timestamps = timestamps; }
private void buildClusters() { DataModel model = getDataModel(); int numUsers = model.getNumUsers(); if (numUsers == 0) { topRecsByUserID = new FastByIDMap <List <RecommendedItem> >(); clustersByUserID = new FastByIDMap <FastIDSet>(); } else { List <FastIDSet> clusters = new List <FastIDSet>(); // Begin with a cluster for each user: var it = model.getUserIDs(); while (it.MoveNext()) { FastIDSet newCluster = new FastIDSet(); newCluster.add(it.Current); clusters.Add(newCluster); } bool done = false; while (!done) { done = mergeClosestClusters(numUsers, clusters, done); } topRecsByUserID = computeTopRecsPerUserID(clusters); clustersByUserID = computeClustersPerUserID(clusters); allClusters = clusters.ToArray(); } }
private void updateAllRecommendableItems() { FastIDSet ids = new FastIDSet(dataModel.getNumItems()); foreach (var entry in averageDiffs.entrySet()) { ids.add(entry.Key); var it = entry.Value.Keys; foreach (var item in it) { ids.add(item); } } allRecommendableItemIDs.clear(); allRecommendableItemIDs.addAll(ids); allRecommendableItemIDs.rehash(); }
public void setTempPrefs(PreferenceArray prefs, long anonymousUserID) { this.tempPrefs[anonymousUserID] = prefs; FastIDSet set = new FastIDSet(); for (int i = 0; i < prefs.length(); i++) { set.add(prefs.getItemID(i)); } this.prefItemIDs[anonymousUserID] = set; }
public override FastIDSet getItemIDsFromUser(long userID) { PreferenceArray array = this.getPreferencesFromUser(userID); int size = array.length(); FastIDSet set = new FastIDSet(size); for (int i = 0; i < size; i++) { set.add(array.getItemID(i)); } return(set); }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) { FastIDSet set = new FastIDSet(dataModel.getNumItems()); IEnumerator <long> enumerator = dataModel.getItemIDs(); while (enumerator.MoveNext()) { set.add(enumerator.Current); } set.removeAll(preferredItemIDs); return(set); }
public FastIDSet getRelevantItemsIDs(long userID, int at, double relevanceThreshold, DataModel dataModel) { PreferenceArray array = dataModel.getPreferencesFromUser(userID); FastIDSet set = new FastIDSet(at); array.sortByValueReversed(); for (int i = 0; (i < array.length()) && (set.size() < at); i++) { if (array.getValue(i) >= relevanceThreshold) { set.add(array.getItemID(i)); } } return(set); }
private static long setBits(FastIDSet modelSet, PreferenceArray prefs, int max) { long num = -1L; for (int i = 0; (i < prefs.length()) && (i < max); i++) { long key = prefs.getItemID(i); modelSet.add(key); if (key > num) { num = key; } } return(num); }
private static long setBits(FastIDSet modelSet, List <RecommendedItem> items, int max) { long num = -1L; for (int i = 0; (i < items.Count) && (i < max); i++) { long key = items[i].getItemID(); modelSet.add(key); if (key > num) { num = key; } } return(num); }
private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) { if (itemIDs.size() > this.maxItemsPerUser) { SamplingLongPrimitiveIterator iterator = new SamplingLongPrimitiveIterator(itemIDs.GetEnumerator(), ((double)this.maxItemsPerUser) / ((double)itemIDs.size())); while (iterator.MoveNext()) { possibleItemIDs.add(iterator.Current); } } else { possibleItemIDs.addAll(itemIDs); } }
public virtual long[] allSimilarItemIDs(long itemID) { FastIDSet set = new FastIDSet(); IEnumerator <long> enumerator = this.dataModel.getItemIDs(); while (enumerator.MoveNext()) { long current = enumerator.Current; if (!double.IsNaN(this.itemSimilarity(itemID, current))) { set.add(current); } } return(set.toArray()); }
public List <RecommendedItem> recommendedBecause(long userID, long itemID, int howMany) { DataModel model = this.getDataModel(); TopItems.Estimator <long> estimator = new RecommendedBecauseEstimator(this, userID, itemID); PreferenceArray array = model.getPreferencesFromUser(userID); int size = array.length(); FastIDSet set = new FastIDSet(size); for (int i = 0; i < size; i++) { set.add(array.getItemID(i)); } set.remove(itemID); return(TopItems.getTopItems(howMany, set.GetEnumerator(), null, estimator)); }
public static FastByIDMap <FastIDSet> toDataMap(FastByIDMap <PreferenceArray> data) { FastByIDMap <FastIDSet> map = new FastByIDMap <FastIDSet>(data.size()); foreach (KeyValuePair <long, PreferenceArray> pair in data.entrySet()) { PreferenceArray array = pair.Value; int size = array.length(); FastIDSet set = new FastIDSet(size); for (int i = 0; i < size; i++) { set.add(array.getItemID(i)); } map.put(pair.Key, set); } return(map); }
public override long[] getUserNeighborhood(long userID) { DataModel model = this.getDataModel(); FastIDSet set = new FastIDSet(); IEnumerator <long> enumerator = SamplingLongPrimitiveIterator.maybeWrapIterator(model.getUserIDs(), this.getSamplingRate()); UserSimilarity similarity = this.getUserSimilarity(); while (enumerator.MoveNext()) { long current = enumerator.Current; if (userID != current) { double d = similarity.userSimilarity(userID, current); if (!(double.IsNaN(d) || (d < this.threshold))) { set.add(current); } } } return(set.toArray()); }
protected override float doEstimatePreference(long theUserID, PreferenceArray preferencesFromUser, long itemID) { DataModel dataModel = getDataModel(); int size = preferencesFromUser.length(); FastIDSet possibleItemIDs = new FastIDSet(size); for (int i = 0; i < size; i++) { possibleItemIDs.add(preferencesFromUser.getItemID(i)); } possibleItemIDs.remove(itemID); List <RecommendedItem> mostSimilar = mostSimilarItems(itemID, possibleItemIDs.GetEnumerator(), neighborhoodSize, null); long[] theNeighborhood = new long[mostSimilar.Count() + 1]; theNeighborhood[0] = -1; List <long> usersRatedNeighborhood = new List <long>(); int nOffset = 0; foreach (RecommendedItem rec in mostSimilar) { theNeighborhood[nOffset++] = rec.getItemID(); } if (mostSimilar.Count != 0) { theNeighborhood[mostSimilar.Count] = itemID; for (int i = 0; i < theNeighborhood.Length; i++) { PreferenceArray usersNeighborhood = dataModel.getPreferencesForItem(theNeighborhood[i]); int size1 = usersRatedNeighborhood.Count == 0 ? usersNeighborhood.length() : usersRatedNeighborhood.Count; for (int j = 0; j < size1; j++) { if (i == 0) { usersRatedNeighborhood.Add(usersNeighborhood.getUserID(j)); } else { if (j >= usersRatedNeighborhood.Count) { break; } long index = usersRatedNeighborhood[j]; if (!usersNeighborhood.hasPrefWithUserID(index) || index == theUserID) { usersRatedNeighborhood.Remove(index); j--; } } } } } double[] weights = null; if (mostSimilar.Count != 0) { weights = getInterpolations(itemID, theNeighborhood, usersRatedNeighborhood); } int n = 0; double preference = 0.0; double totalSimilarity = 0.0; foreach (long jitem in theNeighborhood) { float?pref = dataModel.getPreferenceValue(theUserID, jitem); if (pref != null) { double weight = weights[n]; preference += pref.Value * weight; totalSimilarity += weight; } n++; } return(totalSimilarity == 0.0 ? float.NaN : (float)(preference / totalSimilarity)); }