private void findClusters(List <FastIDSet> newClusters) { if (clusteringByThreshold) { KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters); FastIDSet _cluster1 = nearestPair.Key; FastIDSet _cluster2 = nearestPair.Value; if (_cluster1 != null && _cluster2 != null) { FastIDSet cluster1 = _cluster1; FastIDSet cluster2 = _cluster2; while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold) { newClusters.Remove(cluster1); newClusters.Remove(cluster2); FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); newClusters.Add(merged); nearestPair = findNearestClusters(newClusters); var __cluster1 = nearestPair.Key; var __cluster2 = nearestPair.Value; if (__cluster1 == null || __cluster2 == null) { break; } cluster1 = __cluster1; cluster2 = __cluster2; } } } else { while (newClusters.Count > numClusters) { KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters); FastIDSet _cluster1 = nearestPair.Key; FastIDSet _cluster2 = nearestPair.Value; if (_cluster1 == null || _cluster2 == null) { break; } FastIDSet cluster1 = _cluster1; FastIDSet cluster2 = _cluster2; newClusters.Remove(cluster1); newClusters.Remove(cluster2); FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); newClusters.Add(merged); } } }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) { FastIDSet set = new FastIDSet(); foreach (long num in preferredItemIDs) { set.addAll(this.similarity.allSimilarItemIDs(num)); } set.removeAll(preferredItemIDs); return(set); }
protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID) { DataModel model = this.getDataModel(); FastIDSet set = new FastIDSet(); foreach (long num in theNeighborhood) { set.addAll(model.getItemIDsFromUser(num)); } set.removeAll(model.getItemIDsFromUser(theUserID)); return(set); }
private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) { if (itemIDs.size() > this.maxItemsPerUser) { SamplingLongPrimitiveIterator iterator = new SamplingLongPrimitiveIterator(itemIDs.GetEnumerator(), ((double)this.maxItemsPerUser) / ((double)itemIDs.size())); while (iterator.MoveNext()) { possibleItemIDs.add(iterator.Current); } } else { possibleItemIDs.addAll(itemIDs); } }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) { FastIDSet set = new FastIDSet(); foreach (long num in preferredItemIDs) { PreferenceArray array = dataModel.getPreferencesForItem(num); int num2 = array.length(); for (int i = 0; i < num2; i++) { set.addAll(dataModel.getItemIDsFromUser(array.getUserID(i))); } } set.removeAll(preferredItemIDs); return(set); }
private void updateAllRecommendableItems() { FastIDSet ids = new FastIDSet(dataModel.getNumItems()); foreach (var entry in averageDiffs.entrySet()) { ids.add(entry.Key); var it = entry.Value.Keys; foreach (var item in it) { ids.add(item); } } allRecommendableItemIDs.clear(); allRecommendableItemIDs.addAll(ids); allRecommendableItemIDs.rehash(); }
private List <RecommendedItem> computeTopRecsForCluster(FastIDSet cluster) { DataModel dataModel = getDataModel(); FastIDSet possibleItemIDs = new FastIDSet(); var it = cluster.GetEnumerator(); while (it.MoveNext()) { possibleItemIDs.addAll(dataModel.getItemIDsFromUser(it.Current)); } TopItems.Estimator <long> estimator = new Estimator(cluster, this); List <RecommendedItem> topItems = TopItems.getTopItems(NUM_CLUSTER_RECS, possibleItemIDs.GetEnumerator(), null, estimator); log.debug("Recommendations are: {}", topItems); return(topItems); }
public GenericBooleanPrefDataModel(FastByIDMap <FastIDSet> userData, FastByIDMap <FastByIDMap <DateTime?> > timestamps) { this.preferenceFromUsers = userData; this.preferenceForItems = new FastByIDMap <FastIDSet>(); FastIDSet set = new FastIDSet(); foreach (KeyValuePair <long, FastIDSet> pair in this.preferenceFromUsers.entrySet()) { long key = pair.Key; FastIDSet c = pair.Value; set.addAll(c); IEnumerator <long> enumerator = c.GetEnumerator(); while (enumerator.MoveNext()) { long current = enumerator.Current; FastIDSet set3 = this.preferenceForItems.get(current); if (set3 == null) { set3 = new FastIDSet(2); this.preferenceForItems.put(current, set3); } set3.add(key); } } this.itemIDs = set.toArray(); set = null; Array.Sort <long>(this.itemIDs); this.userIDs = new long[userData.size()]; int num3 = 0; IEnumerator <long> enumerator2 = userData.Keys.GetEnumerator(); while (enumerator2.MoveNext()) { this.userIDs[num3++] = enumerator2.Current; } Array.Sort <long>(this.userIDs); this.timestamps = timestamps; }
private bool mergeClosestClusters(int numUsers, List <FastIDSet> clusters, bool done) { // We find a certain number of closest clusters... List <ClusterClusterPair> queue = findClosestClusters(numUsers, clusters); // List<ClusterClusterPair> queue = new List<ClusterClusterPair>(); //foreach (var item in _queue) //{ // queue.Enqueue(item); //} // The first one is definitely the closest pair in existence so we can cluster // the two together, put it back into the set of clusters, and start again. Instead // we assume everything else in our list of closest cluster pairs is still pretty good, // and we cluster them too. for (int n = 0; n < queue.Count; n++) { //} //while (queue.Count > 0) //{ if (!clusteringByThreshold && clusters.Count <= numClusters) { done = true; break; } ClusterClusterPair top = queue[n]; queue.RemoveAt(n); if (clusteringByThreshold && top.getSimilarity() < clusteringThreshold) { done = true; break; } FastIDSet cluster1 = top.getCluster1(); FastIDSet cluster2 = top.getCluster2(); // Pull out current two clusters from clusters var clusterIterator = clusters; bool removed1 = false; bool removed2 = false; for (int m = 0; m < clusterIterator.Count; m++) { if (!(removed1 && removed2)) { FastIDSet current = clusterIterator[m]; // Yes, use == here if (!removed1 && cluster1 == current) { clusterIterator.RemoveAt(m); m--; removed1 = true; } else if (!removed2 && cluster2 == current) { clusterIterator.RemoveAt(m); m--; removed2 = true; } } // The only catch is if a cluster showed it twice in the list of best cluster pairs; // have to remove the others. Pull out anything referencing these clusters from queue for (int k = 0; k < queue.Count; k++) { //} // for (Iterator<ClusterClusterPair> queueIterator = queue.iterator(); queueIterator.hasNext(); ) // { ClusterClusterPair pair = queue[k]; FastIDSet pair1 = pair.getCluster1(); FastIDSet pair2 = pair.getCluster2(); if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2) { queue.RemoveAt(k); //queueIterator.remove(); } } // Make new merged cluster FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); // Compare against other clusters; update queue if needed // That new pair we're just adding might be pretty close to something else, so // catch that case here and put it back into our queue for (var i = 0; i < clusters.Count; i++) { FastIDSet cluster = clusters[i]; double similarity = clusterSimilarity.getSimilarity(merged, cluster); if (similarity > queue[queue.Count - 1].getSimilarity()) { var queueIterator = queue.GetEnumerator(); while (queueIterator.MoveNext()) { if (similarity > queueIterator.Current.getSimilarity()) { n--; // queueIterator.previous(); break; } } queue.Add(new ClusterClusterPair(merged, cluster, similarity)); } } // Finally add new cluster to list clusters.Add(merged); } } return(done); }