/// TODO: this is the vanilla sgd by Tacaks 2009, I speculate that using scaling technique proposed in: /// Towards Optimal One Pass Large Scale Learning with Averaged Stochastic Gradient Descent section 5, page 6 /// can be beneficial in term s of both speed and accuracy. /// /// Tacaks' method doesn't calculate gradient of regularization correctly, which has non-zero elements everywhere of /// the matrix. While Tacaks' method can only updates a single row/column, if one user has a lot of recommendation, /// her vector will be more affected by regularization using an isolated scaling factor for both user vectors and /// item vectors can remove this issue without inducing more update cost it even reduces it a bit by only performing /// one addition and one multiplication. /// /// BAD SIDE1: the scaling factor decreases fast, it has to be scaled up from time to time before dropped to zero or /// caused roundoff error /// BAD SIDE2: no body experiment on it before, and people generally use very small lambda /// so it's impact on accuracy may still be unknown. /// BAD SIDE3: don't know how to make it work for L1-regularization or /// "pseudorank?" (sum of singular values)-regularization protected void update(IPreference preference, double mu) { int userIdx = userIndex(preference.GetUserID()); int itemIdx = itemIndex(preference.GetItemID()); double[] userVector = userVectors[userIdx]; double[] itemVector = itemVectors[itemIdx]; double prediction = dot(userVector, itemVector); double err = preference.GetValue() - prediction; // adjust features for (int k = FEATURE_OFFSET; k < rank; k++) { double userFeature = userVector[k]; double itemFeature = itemVector[k]; userVector[k] += mu * (err * itemFeature - lambda * userFeature); itemVector[k] += mu * (err * userFeature - lambda * itemFeature); } // adjust user and item bias userVector[USER_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * userVector[USER_BIAS_INDEX]); itemVector[ITEM_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * itemVector[ITEM_BIAS_INDEX]); }
public void testPreferenceShufflerWithSyntheticData() { setUpSyntheticData(); ParallelSGDFactorizer.PreferenceShuffler shuffler = new ParallelSGDFactorizer.PreferenceShuffler(dataModel); shuffler.shuffle(); shuffler.stage(); FastByIDMap <FastByIDMap <bool?> > checkedLst = new FastByIDMap <FastByIDMap <bool?> >(); for (int i = 0; i < shuffler.size(); i++) { IPreference pref = shuffler.get(i); float?value = dataModel.GetPreferenceValue(pref.GetUserID(), pref.GetItemID()); Assert.AreEqual(pref.GetValue(), value.Value, 0.0); if (!checkedLst.ContainsKey(pref.GetUserID())) { checkedLst.Put(pref.GetUserID(), new FastByIDMap <bool?>()); } Assert.IsNull(checkedLst.Get(pref.GetUserID()).Get(pref.GetItemID())); checkedLst.Get(pref.GetUserID()).Put(pref.GetItemID(), true); } var userIDs = dataModel.GetUserIDs(); int index = 0; while (userIDs.MoveNext()) { long userID = userIDs.Current; IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID); foreach (IPreference preference in preferencesFromUser) { Assert.True(checkedLst.Get(preference.GetUserID()).Get(preference.GetItemID()).Value); index++; } } Assert.AreEqual(index, shuffler.size()); }
public GenericItemPreferenceArray(IList <IPreference> prefs) : this(prefs.Count) { int size = prefs.Count; long itemID = Int64.MinValue; for (int i = 0; i < size; i++) { IPreference pref = prefs[i]; ids[i] = pref.GetUserID(); if (i == 0) { itemID = pref.GetItemID(); } else { if (itemID != pref.GetItemID()) { throw new ArgumentException("Not all item IDs are the same"); } } values[i] = pref.GetValue(); } id = itemID; }
protected override void processOneEstimate(float estimatedPreference, IPreference realPref) { double diff = realPref.GetValue() - estimatedPreference; average.AddDatum(diff * diff); }
public void Set(int i, IPreference pref) { id = pref.GetItemID(); ids[i] = pref.GetUserID(); values[i] = pref.GetValue(); }
protected override void processOneEstimate(float estimatedPreference, IPreference realPref) { average.AddDatum(Math.Abs(realPref.GetValue() - estimatedPreference)); }