private static IPreferenceArray cloneAndMergeInto(IPreferenceArray delegatePrefs, long itemID, long newUserID, float value) { int length = delegatePrefs == null ? 0 : delegatePrefs.Length(); int newLength = length + 1; IPreferenceArray newPreferenceArray = new GenericItemPreferenceArray(newLength); // Set item ID once newPreferenceArray.SetItemID(0, itemID); int positionToInsert = 0; while (positionToInsert < length && newUserID > delegatePrefs.GetUserID(positionToInsert)) { positionToInsert++; } for (int i = 0; i < positionToInsert; i++) { newPreferenceArray.SetUserID(i, delegatePrefs.GetUserID(i)); newPreferenceArray.SetValue(i, delegatePrefs.GetValue(i)); } newPreferenceArray.SetUserID(positionToInsert, newUserID); newPreferenceArray.SetValue(positionToInsert, value); for (int i = positionToInsert + 1; i < newLength; i++) { newPreferenceArray.SetUserID(i, delegatePrefs.GetUserID(i - 1)); newPreferenceArray.SetValue(i, delegatePrefs.GetValue(i - 1)); } return(newPreferenceArray); }
public virtual IPreferenceArray GetPreferencesForItem(long itemID) { if (tempPrefs == null) { return(_delegate.GetPreferencesForItem(itemID)); } IPreferenceArray delegatePrefs = null; try { delegatePrefs = _delegate.GetPreferencesForItem(itemID); } catch (NoSuchItemException nsie) { // OK. Probably an item that only the anonymous user has //if (log.isDebugEnabled()) { log.Debug("Item {} unknown", itemID); //} } for (int i = 0; i < tempPrefs.Length(); i++) { if (tempPrefs.GetItemID(i) == itemID) { return(cloneAndMergeInto(delegatePrefs, itemID, tempPrefs.GetUserID(i), tempPrefs.GetValue(i))); } } if (delegatePrefs == null) { // No, didn't find it among the anonymous user prefs throw new NoSuchItemException(itemID); } return(delegatePrefs); }
private void buildAverageDiffs() { lock (this) { //buildAveragesLock.writeLock().lock(); IDataModel dataModel = GetDataModel(); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(it.Current); int size = prefs.Length(); for (int i = 0; i < size; i++) { long itemID = prefs.GetItemID(i); IRunningAverage average = itemAverages.Get(itemID); if (average == null) { average = new FullRunningAverage(); itemAverages.Put(itemID, average); } average.AddDatum(prefs.GetValue(i)); } } } //finally { //buildAveragesLock.writeLock().unlock(); //} }
protected virtual float doEstimatePreference(long userID, IPreferenceArray preferencesFromUser, long itemID) { double preference = 0.0; double totalSimilarity = 0.0; int count = 0; double[] similarities = similarity.ItemSimilarities(itemID, preferencesFromUser.GetIDs()); for (int i = 0; i < similarities.Length; i++) { double theSimilarity = similarities[i]; if (!Double.IsNaN(theSimilarity)) { // Weights can be negative! preference += theSimilarity * preferencesFromUser.GetValue(i); totalSimilarity += theSimilarity; count++; } } // Throw out the estimate if it was based on no data points, of course, but also if based on // just one. This is a bit of a band-aid on the 'stock' item-based algorithm for the moment. // The reason is that in this case the estimate is, simply, the user's rating for one item // that happened to have a defined similarity. The similarity score doesn't matter, and that // seems like a bad situation. if (count <= 1) { return(float.NaN); } float estimate = (float)(preference / totalSimilarity); if (capper != null) { estimate = capper.capEstimate(estimate); } return(estimate); }
public RandomRecommender(IDataModel dataModel) : base(dataModel) { float maxPref = float.NegativeInfinity; float minPref = float.PositiveInfinity; var userIterator = dataModel.GetUserIDs(); while (userIterator.MoveNext()) { long userID = userIterator.Current; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); for (int i = 0; i < prefs.Length(); i++) { float prefValue = prefs.GetValue(i); if (prefValue < minPref) { minPref = prefValue; } if (prefValue > maxPref) { maxPref = prefValue; } } } this.minPref = minPref; this.maxPref = maxPref; }
private static float?getPreferenceForItem(IPreferenceArray preferencesFromUser, long itemID) { int size = preferencesFromUser.Length(); for (int i = 0; i < size; i++) { if (preferencesFromUser.GetItemID(i) == itemID) { return(preferencesFromUser.GetValue(i)); } } return(null); }
public override float?GetPreferenceValue(long userID, long itemID) { IPreferenceArray prefs = GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { if (prefs.GetItemID(i) == itemID) { return(prefs.GetValue(i)); } } return(null); }
public override float?GetPreferenceValue(long userID, long itemID) { if (isAnonymousUser(userID)) { IPreferenceArray singleUserTempPrefs = tempPrefs[userID]; for (int i = 0; i < singleUserTempPrefs.Length(); i++) { if (singleUserTempPrefs.GetItemID(i) == itemID) { return(singleUserTempPrefs.GetValue(i)); } } return(null); } return(getDelegate().GetPreferenceValue(userID, itemID)); }
private static double computeThreshold(IPreferenceArray prefs) { if (prefs.Length() < 2) { // Not enough data points -- return a threshold that allows everything return(Double.NegativeInfinity); } IRunningAverageAndStdDev stdDev = new FullRunningAverageAndStdDev(); int size = prefs.Length(); for (int i = 0; i < size; i++) { stdDev.AddDatum(prefs.GetValue(i)); } return(stdDev.GetAverage() + stdDev.GetStandardDeviation()); }
public float Get(long key) { IPreferenceArray prefs = inf.dataModel.GetPreferencesFromUser(key); int size = prefs.Length(); if (size == 0) { return(ZERO); } IRunningAverage average = new FullRunningAverage(); for (int i = 0; i < size; i++) { average.AddDatum(prefs.GetValue(i)); } return((float)average.GetAverage()); }
public FastIDSet GetRelevantItemsIDs(long userID, int at, double relevanceThreshold, IDataModel dataModel) { IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); FastIDSet relevantItemIDs = new FastIDSet(at); prefs.SortByValueReversed(); for (int i = 0; i < prefs.Length() && relevantItemIDs.Count() < at; i++) { if (prefs.GetValue(i) >= relevanceThreshold) { relevantItemIDs.Add(prefs.GetItemID(i)); } } return(relevantItemIDs); }
private void buildAverageDiffs() { lock (this) { //buildAveragesLock.writeLock().lock(); IDataModel dataModel = GetDataModel(); var it = dataModel.GetUserIDs(); while (it.MoveNext()) { long userID = it.Current; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { long itemID = prefs.GetItemID(i); float value = prefs.GetValue(i); addDatumAndCreateIfNeeded(itemID, value, itemAverages); addDatumAndCreateIfNeeded(userID, value, userAverages); overallAveragePrefValue.AddDatum(value); } } } /*finally { * buildAveragesLock.writeLock().unlock(); * }*/ }
private static double computeThreshold(IPreferenceArray prefs) { if (prefs.Length() < 2) { // Not enough data points -- return a threshold that allows everything return Double.NegativeInfinity; } IRunningAverageAndStdDev stdDev = new FullRunningAverageAndStdDev(); int size = prefs.Length(); for (int i = 0; i < size; i++) { stdDev.AddDatum(prefs.GetValue(i)); } return stdDev.GetAverage() + stdDev.GetStandardDeviation(); }
public override double ItemSimilarity(long itemID1, long itemID2) { IDataModel dataModel = getDataModel(); IPreferenceArray xPrefs = dataModel.GetPreferencesForItem(itemID1); IPreferenceArray yPrefs = dataModel.GetPreferencesForItem(itemID2); int xLength = xPrefs.Length(); int yLength = yPrefs.Length(); if (xLength == 0 || yLength == 0) { return(Double.NaN); } long xIndex = xPrefs.GetUserID(0); long yIndex = yPrefs.GetUserID(0); int xPrefIndex = 0; int yPrefIndex = 0; double sumX = 0.0; double sumX2 = 0.0; double sumY = 0.0; double sumY2 = 0.0; double sumXY = 0.0; double sumXYdiff2 = 0.0; int count = 0; // No, pref inferrers and transforms don't apply here. I think. while (true) { int compare = xIndex <yIndex ? -1 : xIndex> yIndex ? 1 : 0; if (compare == 0) { // Both users expressed a preference for the item double x = xPrefs.GetValue(xPrefIndex); double y = yPrefs.GetValue(yPrefIndex); sumXY += x * y; sumX += x; sumX2 += x * x; sumY += y; sumY2 += y * y; double diff = x - y; sumXYdiff2 += diff * diff; count++; } if (compare <= 0) { if (++xPrefIndex == xLength) { break; } xIndex = xPrefs.GetUserID(xPrefIndex); } if (compare >= 0) { if (++yPrefIndex == yLength) { break; } yIndex = yPrefs.GetUserID(yPrefIndex); } } double result; if (centerData) { // See comments above on these computations double n = (double)count; double meanX = sumX / n; double meanY = sumY / n; // double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY; double centeredSumXY = sumXY - meanY * sumX; // double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX; double centeredSumX2 = sumX2 - meanX * sumX; // double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY; double centeredSumY2 = sumY2 - meanY * sumY; result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2); } else { result = computeResult(count, sumXY, sumX2, sumY2, sumXYdiff2); } if (!Double.IsNaN(result)) { result = normalizeWeightResult(result, count, cachedNumUsers); } return(result); }
public double UserSimilarity(long userID1, long userID2) { IDataModel dataModel = getDataModel(); IPreferenceArray xPrefs = dataModel.GetPreferencesFromUser(userID1); IPreferenceArray yPrefs = dataModel.GetPreferencesFromUser(userID2); int xLength = xPrefs.Length(); int yLength = yPrefs.Length(); if (xLength == 0 || yLength == 0) { return(Double.NaN); } long xIndex = xPrefs.GetItemID(0); long yIndex = yPrefs.GetItemID(0); int xPrefIndex = 0; int yPrefIndex = 0; double sumX = 0.0; double sumX2 = 0.0; double sumY = 0.0; double sumY2 = 0.0; double sumXY = 0.0; double sumXYdiff2 = 0.0; int count = 0; bool hasInferrer = inferrer != null; while (true) { int compare = xIndex <yIndex ? -1 : xIndex> yIndex ? 1 : 0; if (hasInferrer || compare == 0) { double x; double y; if (xIndex == yIndex) { // Both users expressed a preference for the item x = xPrefs.GetValue(xPrefIndex); y = yPrefs.GetValue(yPrefIndex); } else { // Only one user expressed a preference, but infer the other one's preference and tally // as if the other user expressed that preference if (compare < 0) { // X has a value; infer Y's x = xPrefs.GetValue(xPrefIndex); y = inferrer.InferPreference(userID2, xIndex); } else { // compare > 0 // Y has a value; infer X's x = inferrer.InferPreference(userID1, yIndex); y = yPrefs.GetValue(yPrefIndex); } } sumXY += x * y; sumX += x; sumX2 += x * x; sumY += y; sumY2 += y * y; double diff = x - y; sumXYdiff2 += diff * diff; count++; } if (compare <= 0) { if (++xPrefIndex >= xLength) { if (hasInferrer) { // Must count other Ys; pretend next X is far away if (yIndex == long.MaxValue) { // ... but stop if both are done! break; } xIndex = long.MaxValue; } else { break; } } else { xIndex = xPrefs.GetItemID(xPrefIndex); } } if (compare >= 0) { if (++yPrefIndex >= yLength) { if (hasInferrer) { // Must count other Xs; pretend next Y is far away if (xIndex == long.MaxValue) { // ... but stop if both are done! break; } yIndex = long.MaxValue; } else { break; } } else { yIndex = yPrefs.GetItemID(yPrefIndex); } } } // "Center" the data. If my math is correct, this'll do it. double result; if (centerData) { double meanX = sumX / count; double meanY = sumY / count; // double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY; double centeredSumXY = sumXY - meanY * sumX; // double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX; double centeredSumX2 = sumX2 - meanX * sumX; // double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY; double centeredSumY2 = sumY2 - meanY * sumY; result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2); } else { result = computeResult(count, sumXY, sumX2, sumY2, sumXYdiff2); } if (!Double.IsNaN(result)) { result = normalizeWeightResult(result, count, cachedNumItems); } return(result); }
public double UserSimilarity(long userID1, long userID2) { IPreferenceArray xPrefs = dataModel.GetPreferencesFromUser(userID1); IPreferenceArray yPrefs = dataModel.GetPreferencesFromUser(userID2); int xLength = xPrefs.Length(); int yLength = yPrefs.Length(); if (xLength <= 1 || yLength <= 1) { return(Double.NaN); } // Copy prefs since we need to modify pref values to ranks xPrefs = xPrefs.Clone(); yPrefs = yPrefs.Clone(); // First sort by values from low to high xPrefs.SortByValue(); yPrefs.SortByValue(); // Assign ranks from low to high float nextRank = 1.0f; for (int i = 0; i < xLength; i++) { // ... but only for items that are common to both pref arrays if (yPrefs.HasPrefWithItemID(xPrefs.GetItemID(i))) { xPrefs.SetValue(i, nextRank); nextRank += 1.0f; } // Other values are bogus but don't matter } nextRank = 1.0f; for (int i = 0; i < yLength; i++) { if (xPrefs.HasPrefWithItemID(yPrefs.GetItemID(i))) { yPrefs.SetValue(i, nextRank); nextRank += 1.0f; } } xPrefs.SortByItem(); yPrefs.SortByItem(); long xIndex = xPrefs.GetItemID(0); long yIndex = yPrefs.GetItemID(0); int xPrefIndex = 0; int yPrefIndex = 0; double sumXYRankDiff2 = 0.0; int count = 0; while (true) { int compare = xIndex <yIndex ? -1 : xIndex> yIndex ? 1 : 0; if (compare == 0) { double diff = xPrefs.GetValue(xPrefIndex) - yPrefs.GetValue(yPrefIndex); sumXYRankDiff2 += diff * diff; count++; } if (compare <= 0) { if (++xPrefIndex >= xLength) { break; } xIndex = xPrefs.GetItemID(xPrefIndex); } if (compare >= 0) { if (++yPrefIndex >= yLength) { break; } yIndex = yPrefs.GetItemID(yPrefIndex); } } if (count <= 1) { return(Double.NaN); } // When ranks are unique, this formula actually gives the Pearson correlation return(1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1))); }
private static IPreferenceArray cloneAndMergeInto(IPreferenceArray delegatePrefs, long itemID, long newUserID, float value) { int length = delegatePrefs == null ? 0 : delegatePrefs.Length(); int newLength = length + 1; IPreferenceArray newPreferenceArray = new GenericItemPreferenceArray(newLength); // Set item ID once newPreferenceArray.SetItemID(0, itemID); int positionToInsert = 0; while (positionToInsert < length && newUserID > delegatePrefs.GetUserID(positionToInsert)) { positionToInsert++; } for (int i = 0; i < positionToInsert; i++) { newPreferenceArray.SetUserID(i, delegatePrefs.GetUserID(i)); newPreferenceArray.SetValue(i, delegatePrefs.GetValue(i)); } newPreferenceArray.SetUserID(positionToInsert, newUserID); newPreferenceArray.SetValue(positionToInsert, value); for (int i = positionToInsert + 1; i < newLength; i++) { newPreferenceArray.SetUserID(i, delegatePrefs.GetUserID(i - 1)); newPreferenceArray.SetValue(i, delegatePrefs.GetValue(i - 1)); } return newPreferenceArray; }
private void splitOneUsersPrefs(double trainingPercentage, FastByIDMap <IPreferenceArray> trainingPrefs, FastByIDMap <IPreferenceArray> testPrefs, long userID, IDataModel dataModel) { List <IPreference> oneUserTrainingPrefs = null; List <IPreference> oneUserTestPrefs = null; IPreferenceArray prefs = dataModel.GetPreferencesFromUser(userID); int size = prefs.Length(); for (int i = 0; i < size; i++) { IPreference newPref = new GenericPreference(userID, prefs.GetItemID(i), prefs.GetValue(i)); if (random.nextDouble() < trainingPercentage) { if (oneUserTrainingPrefs == null) { oneUserTrainingPrefs = new List <IPreference>(3); } oneUserTrainingPrefs.Add(newPref); } else { if (oneUserTestPrefs == null) { oneUserTestPrefs = new List <IPreference>(3); } oneUserTestPrefs.Add(newPref); } } if (oneUserTrainingPrefs != null) { trainingPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs)); if (oneUserTestPrefs != null) { testPrefs.Put(userID, new GenericUserPreferenceArray(oneUserTestPrefs)); } } }