public double userSimilarity(long userID1, long userID2) { DataModel model = base.getDataModel(); FastIDSet other = model.getItemIDsFromUser(userID1); FastIDSet set2 = model.getItemIDsFromUser(userID2); int num = other.size(); int num2 = set2.size(); if ((num == 0) && (num2 == 0)) { return(double.NaN); } if ((num == 0) || (num2 == 0)) { return(0.0); } int num3 = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2); if (num3 == 0) { return(double.NaN); } int num4 = (num + num2) - num3; return(((double)num3) / ((double)num4)); }
protected override void prepareTraining() { int num; int num2; base.prepareTraining(); RandomWrapper wrapper = RandomUtils.getRandom(); this.p = new double[base.dataModel.getNumUsers()][]; for (num = 0; num < this.p.Length; num++) { this.p[num] = new double[base.numFeatures]; num2 = 0; while (num2 < RatingSGDFactorizer.FEATURE_OFFSET) { this.p[num][num2] = 0.0; num2++; } num2 = RatingSGDFactorizer.FEATURE_OFFSET; while (num2 < base.numFeatures) { this.p[num][num2] = wrapper.nextGaussian() * base.randomNoise; num2++; } } this.y = new double[base.dataModel.getNumItems()][]; for (num = 0; num < this.y.Length; num++) { this.y[num] = new double[base.numFeatures]; num2 = 0; while (num2 < RatingSGDFactorizer.FEATURE_OFFSET) { this.y[num][num2] = 0.0; num2++; } for (num2 = RatingSGDFactorizer.FEATURE_OFFSET; num2 < base.numFeatures; num2++) { this.y[num][num2] = wrapper.nextGaussian() * base.randomNoise; } } this.itemsByUser = new Dictionary <int, List <int> >(); IEnumerator <long> enumerator = base.dataModel.getUserIDs(); while (enumerator.MoveNext()) { long current = enumerator.Current; int num4 = base.userIndex(current); FastIDSet set = base.dataModel.getItemIDsFromUser(current); List <int> list = new List <int>(set.size()); this.itemsByUser[num4] = list; foreach (long num5 in set) { int item = base.itemIndex(num5); list.Add(item); } } }
public double userSimilarity(long userID1, long userID2) { DataModel model = base.getDataModel(); FastIDSet other = model.getItemIDsFromUser(userID1); FastIDSet set2 = model.getItemIDsFromUser(userID2); int num = other.size(); int num2 = set2.size(); int intersection = (num < num2) ? set2.intersectionSize(other) : other.intersectionSize(set2); return(doSimilarity(num, num2, intersection)); }
private void findClusters(List <FastIDSet> newClusters) { if (clusteringByThreshold) { KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters); FastIDSet _cluster1 = nearestPair.Key; FastIDSet _cluster2 = nearestPair.Value; if (_cluster1 != null && _cluster2 != null) { FastIDSet cluster1 = _cluster1; FastIDSet cluster2 = _cluster2; while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold) { newClusters.Remove(cluster1); newClusters.Remove(cluster2); FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); newClusters.Add(merged); nearestPair = findNearestClusters(newClusters); var __cluster1 = nearestPair.Key; var __cluster2 = nearestPair.Value; if (__cluster1 == null || __cluster2 == null) { break; } cluster1 = __cluster1; cluster2 = __cluster2; } } } else { while (newClusters.Count > numClusters) { KeyValuePair <FastIDSet, FastIDSet> nearestPair = findNearestClusters(newClusters); FastIDSet _cluster1 = nearestPair.Key; FastIDSet _cluster2 = nearestPair.Value; if (_cluster1 == null || _cluster2 == null) { break; } FastIDSet cluster1 = _cluster1; FastIDSet cluster2 = _cluster2; newClusters.Remove(cluster1); newClusters.Remove(cluster2); FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); newClusters.Add(merged); } } }
private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) { if (itemIDs.size() > this.maxItemsPerUser) { SamplingLongPrimitiveIterator iterator = new SamplingLongPrimitiveIterator(itemIDs.GetEnumerator(), ((double)this.maxItemsPerUser) / ((double)itemIDs.size())); while (iterator.MoveNext()) { possibleItemIDs.add(iterator.Current); } } else { possibleItemIDs.addAll(itemIDs); } }
public FastIDSet getRelevantItemsIDs(long userID, int at, double relevanceThreshold, DataModel dataModel) { PreferenceArray array = dataModel.getPreferencesFromUser(userID); FastIDSet set = new FastIDSet(at); array.sortByValueReversed(); for (int i = 0; (i < array.length()) && (set.size() < at); i++) { if (array.getValue(i) >= relevanceThreshold) { set.add(array.getItemID(i)); } } return(set); }
public override int getNumUsersWithPreferenceFor(long itemID1, long itemID2) { FastIDSet other = this.preferenceForItems.get(itemID1); if (other == null) { return(0); } FastIDSet set2 = this.preferenceForItems.get(itemID2); if (set2 == null) { return(0); } return((other.size() < set2.size()) ? set2.intersectionSize(other) : other.intersectionSize(set2)); }
public double userSimilarity(long userID1, long userID2) { DataModel model = base.getDataModel(); FastIDSet other = model.getItemIDsFromUser(userID1); FastIDSet set2 = model.getItemIDsFromUser(userID2); long num = other.size(); long num2 = set2.size(); long num3 = (num < num2) ? ((long)set2.intersectionSize(other)) : ((long)other.intersectionSize(set2)); if (num3 == 0L) { return(double.NaN); } long num4 = model.getNumItems(); double num5 = LogLikelihood.logLikelihoodRatio(num3, num2 - num3, num - num3, ((num4 - num) - num2) + num3); return(1.0 - (1.0 / (1.0 + num5))); }
private List <RecommendedItem> computeTopRecsForCluster(FastIDSet cluster) { DataModel dataModel = getDataModel(); FastIDSet possibleItemIDs = new FastIDSet(); var it = cluster.GetEnumerator(); while (it.MoveNext()) { possibleItemIDs.addAll(dataModel.getItemIDsFromUser(it.Current)); } TopItems.Estimator <long> estimator = new Estimator(this, cluster); List <RecommendedItem> topItems = TopItems.getTopItems(possibleItemIDs.size(), possibleItemIDs.GetEnumerator(), null, estimator); log.debug("Recommendations are: {}", topItems); return(topItems); }
public override PreferenceArray getPreferencesFromUser(long userID) { FastIDSet set = this.preferenceFromUsers.get(userID); if (set == null) { throw new NoSuchUserException(userID); } PreferenceArray array = new BooleanUserPreferenceArray(set.size()); int i = 0; IEnumerator <long> enumerator = set.GetEnumerator(); while (enumerator.MoveNext()) { array.setUserID(i, userID); array.setItemID(i, enumerator.Current); i++; } return(array); }
private bool mergeClosestClusters(int numUsers, List <FastIDSet> clusters, bool done) { // We find a certain number of closest clusters... List <ClusterClusterPair> queue = findClosestClusters(numUsers, clusters); // List<ClusterClusterPair> queue = new List<ClusterClusterPair>(); //foreach (var item in _queue) //{ // queue.Enqueue(item); //} // The first one is definitely the closest pair in existence so we can cluster // the two together, put it back into the set of clusters, and start again. Instead // we assume everything else in our list of closest cluster pairs is still pretty good, // and we cluster them too. for (int n = 0; n < queue.Count; n++) { //} //while (queue.Count > 0) //{ if (!clusteringByThreshold && clusters.Count <= numClusters) { done = true; break; } ClusterClusterPair top = queue[n]; queue.RemoveAt(n); if (clusteringByThreshold && top.getSimilarity() < clusteringThreshold) { done = true; break; } FastIDSet cluster1 = top.getCluster1(); FastIDSet cluster2 = top.getCluster2(); // Pull out current two clusters from clusters var clusterIterator = clusters; bool removed1 = false; bool removed2 = false; for (int m = 0; m < clusterIterator.Count; m++) { if (!(removed1 && removed2)) { FastIDSet current = clusterIterator[m]; // Yes, use == here if (!removed1 && cluster1 == current) { clusterIterator.RemoveAt(m); m--; removed1 = true; } else if (!removed2 && cluster2 == current) { clusterIterator.RemoveAt(m); m--; removed2 = true; } } // The only catch is if a cluster showed it twice in the list of best cluster pairs; // have to remove the others. Pull out anything referencing these clusters from queue for (int k = 0; k < queue.Count; k++) { //} // for (Iterator<ClusterClusterPair> queueIterator = queue.iterator(); queueIterator.hasNext(); ) // { ClusterClusterPair pair = queue[k]; FastIDSet pair1 = pair.getCluster1(); FastIDSet pair2 = pair.getCluster2(); if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2) { queue.RemoveAt(k); //queueIterator.remove(); } } // Make new merged cluster FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size()); merged.addAll(cluster1); merged.addAll(cluster2); // Compare against other clusters; update queue if needed // That new pair we're just adding might be pretty close to something else, so // catch that case here and put it back into our queue for (var i = 0; i < clusters.Count; i++) { FastIDSet cluster = clusters[i]; double similarity = clusterSimilarity.getSimilarity(merged, cluster); if (similarity > queue[queue.Count - 1].getSimilarity()) { var queueIterator = queue.GetEnumerator(); while (queueIterator.MoveNext()) { if (similarity > queueIterator.Current.getSimilarity()) { n--; // queueIterator.previous(); break; } } queue.Add(new ClusterClusterPair(merged, cluster, similarity)); } } // Finally add new cluster to list clusters.Add(merged); } } return(done); }
public IRStatistics evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder, DataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) { int num = dataModel.getNumItems(); RunningAverage average = new FullRunningAverage(); RunningAverage average2 = new FullRunningAverage(); RunningAverage average3 = new FullRunningAverage(); RunningAverage average4 = new FullRunningAverage(); int num2 = 0; int num3 = 0; IEnumerator <long> enumerator = dataModel.getUserIDs(); while (enumerator.MoveNext()) { long current = enumerator.Current; if (this.random.nextDouble() < evaluationPercentage) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); PreferenceArray prefs = dataModel.getPreferencesFromUser(current); double num5 = double.IsNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = this.dataSplitter.getRelevantItemsIDs(current, at, num5, dataModel); int num6 = relevantItemIDs.size(); if (num6 > 0) { FastByIDMap <PreferenceArray> trainingUsers = new FastByIDMap <PreferenceArray>(dataModel.getNumUsers()); IEnumerator <long> enumerator2 = dataModel.getUserIDs(); while (enumerator2.MoveNext()) { this.dataSplitter.processOtherUser(current, relevantItemIDs, trainingUsers, enumerator2.Current, dataModel); } DataModel model = (dataModelBuilder == null) ? new GenericDataModel(trainingUsers) : dataModelBuilder.buildDataModel(trainingUsers); try { model.getPreferencesFromUser(current); } catch (NoSuchUserException) { continue; } int num7 = num6 + model.getItemIDsFromUser(current).size(); if (num7 >= (2 * at)) { Recommender recommender = recommenderBuilder.buildRecommender(model); int num8 = 0; List <RecommendedItem> list = recommender.recommend(current, at, rescorer); foreach (RecommendedItem item in list) { if (relevantItemIDs.contains(item.getItemID())) { num8++; } } int count = list.Count; if (count > 0) { average.addDatum(((double)num8) / ((double)count)); } average2.addDatum(((double)num8) / ((double)num6)); if (num6 < num7) { average3.addDatum(((double)(count - num8)) / ((double)(num - num6))); } double num10 = 0.0; double num11 = 0.0; for (int i = 0; i < count; i++) { RecommendedItem item2 = list[i]; double num13 = 1.0 / log2(i + 2.0); if (relevantItemIDs.contains(item2.getItemID())) { num10 += num13; } if (i < num6) { num11 += num13; } } if (num11 > 0.0) { average4.addDatum(num10 / num11); } num2++; if (count > 0) { num3++; } stopwatch.Stop(); log.info("Evaluated with user {} in {}ms", new object[] { current, stopwatch.ElapsedMilliseconds }); log.info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", new object[] { average.getAverage(), average2.getAverage(), average3.getAverage(), average4.getAverage(), ((double)num3) / ((double)num2) }); } } } } return(new IRStatisticsImpl(average.getAverage(), average2.getAverage(), average3.getAverage(), average4.getAverage(), ((double)num3) / ((double)num2))); }
public override int getNumUsersWithPreferenceFor(long itemID) { FastIDSet set = this.preferenceForItems.get(itemID); return((set == null) ? 0 : set.size()); }