/// TODO: this is the vanilla sgd by Tacaks 2009, I speculate that using scaling technique proposed in:
        /// Towards Optimal One Pass Large Scale Learning with Averaged Stochastic Gradient Descent section 5, page 6
        /// can be beneficial in term s of both speed and accuracy.
        ///
        /// Tacaks' method doesn't calculate gradient of regularization correctly, which has non-zero elements everywhere of
        /// the matrix. While Tacaks' method can only updates a single row/column, if one user has a lot of recommendation,
        /// her vector will be more affected by regularization using an isolated scaling factor for both user vectors and
        /// item vectors can remove this issue without inducing more update cost it even reduces it a bit by only performing
        /// one addition and one multiplication.
        ///
        /// BAD SIDE1: the scaling factor decreases fast, it has to be scaled up from time to time before dropped to zero or
        ///            caused roundoff error
        /// BAD SIDE2: no body experiment on it before, and people generally use very small lambda
        ///            so it's impact on accuracy may still be unknown.
        /// BAD SIDE3: don't know how to make it work for L1-regularization or
        ///            "pseudorank?" (sum of singular values)-regularization
        protected void update(IPreference preference, double mu)
        {
            int userIdx = userIndex(preference.GetUserID());
            int itemIdx = itemIndex(preference.GetItemID());

            double[] userVector = userVectors[userIdx];
            double[] itemVector = itemVectors[itemIdx];

            double prediction = dot(userVector, itemVector);
            double err        = preference.GetValue() - prediction;

            // adjust features
            for (int k = FEATURE_OFFSET; k < rank; k++)
            {
                double userFeature = userVector[k];
                double itemFeature = itemVector[k];

                userVector[k] += mu * (err * itemFeature - lambda * userFeature);
                itemVector[k] += mu * (err * userFeature - lambda * itemFeature);
            }

            // adjust user and item bias
            userVector[USER_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * userVector[USER_BIAS_INDEX]);
            itemVector[ITEM_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * itemVector[ITEM_BIAS_INDEX]);
        }
Ejemplo n.º 2
0
        public void testPreferenceShufflerWithSyntheticData()
        {
            setUpSyntheticData();

            ParallelSGDFactorizer.PreferenceShuffler shuffler = new ParallelSGDFactorizer.PreferenceShuffler(dataModel);
            shuffler.shuffle();
            shuffler.stage();

            FastByIDMap <FastByIDMap <bool?> > checkedLst = new FastByIDMap <FastByIDMap <bool?> >();

            for (int i = 0; i < shuffler.size(); i++)
            {
                IPreference pref = shuffler.get(i);

                float?value = dataModel.GetPreferenceValue(pref.GetUserID(), pref.GetItemID());
                Assert.AreEqual(pref.GetValue(), value.Value, 0.0);
                if (!checkedLst.ContainsKey(pref.GetUserID()))
                {
                    checkedLst.Put(pref.GetUserID(), new FastByIDMap <bool?>());
                }

                Assert.IsNull(checkedLst.Get(pref.GetUserID()).Get(pref.GetItemID()));

                checkedLst.Get(pref.GetUserID()).Put(pref.GetItemID(), true);
            }

            var userIDs = dataModel.GetUserIDs();
            int index   = 0;

            while (userIDs.MoveNext())
            {
                long             userID = userIDs.Current;
                IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID);
                foreach (IPreference preference in preferencesFromUser)
                {
                    Assert.True(checkedLst.Get(preference.GetUserID()).Get(preference.GetItemID()).Value);
                    index++;
                }
            }
            Assert.AreEqual(index, shuffler.size());
        }
Ejemplo n.º 3
0
        public GenericItemPreferenceArray(IList <IPreference> prefs) : this(prefs.Count)
        {
            int  size   = prefs.Count;
            long itemID = Int64.MinValue;

            for (int i = 0; i < size; i++)
            {
                IPreference pref = prefs[i];
                ids[i] = pref.GetUserID();
                if (i == 0)
                {
                    itemID = pref.GetItemID();
                }
                else
                {
                    if (itemID != pref.GetItemID())
                    {
                        throw new ArgumentException("Not all item IDs are the same");
                    }
                }
                values[i] = pref.GetValue();
            }
            id = itemID;
        }
Ejemplo n.º 4
0
        protected override void processOneEstimate(float estimatedPreference, IPreference realPref)
        {
            double diff = realPref.GetValue() - estimatedPreference;

            average.AddDatum(diff * diff);
        }
 public void Set(int i, IPreference pref) {
   id = pref.GetItemID();
   ids[i] = pref.GetUserID();
   values[i] = pref.GetValue();
 }
Ejemplo n.º 6
0
 public void Set(int i, IPreference pref)
 {
     id        = pref.GetItemID();
     ids[i]    = pref.GetUserID();
     values[i] = pref.GetValue();
 }
  /// TODO: this is the vanilla sgd by Tacaks 2009, I speculate that using scaling technique proposed in:
   /// Towards Optimal One Pass Large Scale Learning with Averaged Stochastic Gradient Descent section 5, page 6
   /// can be beneficial in term s of both speed and accuracy.
   ///
   /// Tacaks' method doesn't calculate gradient of regularization correctly, which has non-zero elements everywhere of
   /// the matrix. While Tacaks' method can only updates a single row/column, if one user has a lot of recommendation,
   /// her vector will be more affected by regularization using an isolated scaling factor for both user vectors and
   /// item vectors can remove this issue without inducing more update cost it even reduces it a bit by only performing
   /// one addition and one multiplication.
   ///
   /// BAD SIDE1: the scaling factor decreases fast, it has to be scaled up from time to time before dropped to zero or
   ///            caused roundoff error
   /// BAD SIDE2: no body experiment on it before, and people generally use very small lambda
   ///            so it's impact on accuracy may still be unknown.
   /// BAD SIDE3: don't know how to make it work for L1-regularization or
   ///            "pseudorank?" (sum of singular values)-regularization 
  protected void update(IPreference preference, double mu) {
    int userIdx = userIndex(preference.GetUserID());
    int itemIdx = itemIndex(preference.GetItemID());

    double[] userVector = userVectors[userIdx];
    double[] itemVector = itemVectors[itemIdx];

    double prediction = dot(userVector, itemVector);
    double err = preference.GetValue() - prediction;

    // adjust features
    for (int k = FEATURE_OFFSET; k < rank; k++) {
      double userFeature = userVector[k];
      double itemFeature = itemVector[k];

      userVector[k] += mu * (err * itemFeature - lambda * userFeature);
      itemVector[k] += mu * (err * userFeature - lambda * itemFeature);
    }

    // adjust user and item bias
    userVector[USER_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * userVector[USER_BIAS_INDEX]);
    itemVector[ITEM_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * itemVector[ITEM_BIAS_INDEX]);
  }
 protected override void processOneEstimate(float estimatedPreference, IPreference realPref) {
   average.AddDatum(Math.Abs(realPref.GetValue() - estimatedPreference));
 }
 protected override void processOneEstimate(float estimatedPreference, IPreference realPref)
 {
     average.AddDatum(Math.Abs(realPref.GetValue() - estimatedPreference));
 }
 protected override void processOneEstimate(float estimatedPreference, IPreference realPref) {
   double diff = realPref.GetValue() - estimatedPreference;
   average.AddDatum(diff * diff);
 }