/// TODO: this is the vanilla sgd by Tacaks 2009, I speculate that using scaling technique proposed in:
        /// Towards Optimal One Pass Large Scale Learning with Averaged Stochastic Gradient Descent section 5, page 6
        /// can be beneficial in term s of both speed and accuracy.
        ///
        /// Tacaks' method doesn't calculate gradient of regularization correctly, which has non-zero elements everywhere of
        /// the matrix. While Tacaks' method can only updates a single row/column, if one user has a lot of recommendation,
        /// her vector will be more affected by regularization using an isolated scaling factor for both user vectors and
        /// item vectors can remove this issue without inducing more update cost it even reduces it a bit by only performing
        /// one addition and one multiplication.
        ///
        /// BAD SIDE1: the scaling factor decreases fast, it has to be scaled up from time to time before dropped to zero or
        ///            caused roundoff error
        /// BAD SIDE2: no body experiment on it before, and people generally use very small lambda
        ///            so it's impact on accuracy may still be unknown.
        /// BAD SIDE3: don't know how to make it work for L1-regularization or
        ///            "pseudorank?" (sum of singular values)-regularization
        protected void update(IPreference preference, double mu)
        {
            int userIdx = userIndex(preference.GetUserID());
            int itemIdx = itemIndex(preference.GetItemID());

            double[] userVector = userVectors[userIdx];
            double[] itemVector = itemVectors[itemIdx];

            double prediction = dot(userVector, itemVector);
            double err        = preference.GetValue() - prediction;

            // adjust features
            for (int k = FEATURE_OFFSET; k < rank; k++)
            {
                double userFeature = userVector[k];
                double itemFeature = itemVector[k];

                userVector[k] += mu * (err * itemFeature - lambda * userFeature);
                itemVector[k] += mu * (err * userFeature - lambda * itemFeature);
            }

            // adjust user and item bias
            userVector[USER_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * userVector[USER_BIAS_INDEX]);
            itemVector[ITEM_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * itemVector[ITEM_BIAS_INDEX]);
        }
Beispiel #2
0
        public void testPreferenceShufflerWithSyntheticData()
        {
            setUpSyntheticData();

            ParallelSGDFactorizer.PreferenceShuffler shuffler = new ParallelSGDFactorizer.PreferenceShuffler(dataModel);
            shuffler.shuffle();
            shuffler.stage();

            FastByIDMap <FastByIDMap <bool?> > checkedLst = new FastByIDMap <FastByIDMap <bool?> >();

            for (int i = 0; i < shuffler.size(); i++)
            {
                IPreference pref = shuffler.get(i);

                float?value = dataModel.GetPreferenceValue(pref.GetUserID(), pref.GetItemID());
                Assert.AreEqual(pref.GetValue(), value.Value, 0.0);
                if (!checkedLst.ContainsKey(pref.GetUserID()))
                {
                    checkedLst.Put(pref.GetUserID(), new FastByIDMap <bool?>());
                }

                Assert.IsNull(checkedLst.Get(pref.GetUserID()).Get(pref.GetItemID()));

                checkedLst.Get(pref.GetUserID()).Put(pref.GetItemID(), true);
            }

            var userIDs = dataModel.GetUserIDs();
            int index   = 0;

            while (userIDs.MoveNext())
            {
                long             userID = userIDs.Current;
                IPreferenceArray preferencesFromUser = dataModel.GetPreferencesFromUser(userID);
                foreach (IPreference preference in preferencesFromUser)
                {
                    Assert.True(checkedLst.Get(preference.GetUserID()).Get(preference.GetItemID()).Value);
                    index++;
                }
            }
            Assert.AreEqual(index, shuffler.size());
        }
        public BooleanUserPreferenceArray(List <IPreference> prefs) : this(prefs.Count)
        {
            int size = prefs.Count;

            for (int i = 0; i < size; i++)
            {
                IPreference pref = prefs[i];
                ids[i] = pref.GetItemID();
            }
            if (size > 0)
            {
                id = prefs[0].GetUserID();
            }
        }
Beispiel #4
0
        public BooleanItemPreferenceArray(List <IPreference> prefs, bool forOneUser) : this(prefs.Count)
        {
            int size = prefs.Count;

            for (int i = 0; i < size; i++)
            {
                IPreference pref = prefs[i];
                ids[i] = forOneUser ? pref.GetItemID() : pref.GetUserID();
            }
            if (size > 0)
            {
                id = forOneUser ? prefs[0].GetUserID() : prefs[0].GetItemID();
            }
        }
        public void testPreferencesForItem()
        {
            IPreferenceArray prefs = model.GetPreferencesForItem(456);

            Assert.NotNull(prefs);
            IPreference pref1 = prefs.Get(0);

            Assert.AreEqual(123, pref1.GetUserID());
            Assert.AreEqual(456, pref1.GetItemID());
            IPreference pref2 = prefs.Get(1);

            Assert.AreEqual(456, pref2.GetUserID());
            Assert.AreEqual(456, pref2.GetItemID());
            Assert.AreEqual(2, prefs.Length());
        }
Beispiel #6
0
        public GenericItemPreferenceArray(IList <IPreference> prefs) : this(prefs.Count)
        {
            int  size   = prefs.Count;
            long itemID = Int64.MinValue;

            for (int i = 0; i < size; i++)
            {
                IPreference pref = prefs[i];
                ids[i] = pref.GetUserID();
                if (i == 0)
                {
                    itemID = pref.GetItemID();
                }
                else
                {
                    if (itemID != pref.GetItemID())
                    {
                        throw new ArgumentException("Not all item IDs are the same");
                    }
                }
                values[i] = pref.GetValue();
            }
            id = itemID;
        }
 public void Set(int i, IPreference pref) {
   id = pref.GetItemID();
   ids[i] = pref.GetUserID();
   values[i] = pref.GetValue();
 }
Beispiel #8
0
 public void Set(int i, IPreference pref)
 {
     id        = pref.GetItemID();
     ids[i]    = pref.GetUserID();
     values[i] = pref.GetValue();
 }
  /// TODO: this is the vanilla sgd by Tacaks 2009, I speculate that using scaling technique proposed in:
   /// Towards Optimal One Pass Large Scale Learning with Averaged Stochastic Gradient Descent section 5, page 6
   /// can be beneficial in term s of both speed and accuracy.
   ///
   /// Tacaks' method doesn't calculate gradient of regularization correctly, which has non-zero elements everywhere of
   /// the matrix. While Tacaks' method can only updates a single row/column, if one user has a lot of recommendation,
   /// her vector will be more affected by regularization using an isolated scaling factor for both user vectors and
   /// item vectors can remove this issue without inducing more update cost it even reduces it a bit by only performing
   /// one addition and one multiplication.
   ///
   /// BAD SIDE1: the scaling factor decreases fast, it has to be scaled up from time to time before dropped to zero or
   ///            caused roundoff error
   /// BAD SIDE2: no body experiment on it before, and people generally use very small lambda
   ///            so it's impact on accuracy may still be unknown.
   /// BAD SIDE3: don't know how to make it work for L1-regularization or
   ///            "pseudorank?" (sum of singular values)-regularization 
  protected void update(IPreference preference, double mu) {
    int userIdx = userIndex(preference.GetUserID());
    int itemIdx = itemIndex(preference.GetItemID());

    double[] userVector = userVectors[userIdx];
    double[] itemVector = itemVectors[itemIdx];

    double prediction = dot(userVector, itemVector);
    double err = preference.GetValue() - prediction;

    // adjust features
    for (int k = FEATURE_OFFSET; k < rank; k++) {
      double userFeature = userVector[k];
      double itemFeature = itemVector[k];

      userVector[k] += mu * (err * itemFeature - lambda * userFeature);
      itemVector[k] += mu * (err * userFeature - lambda * itemFeature);
    }

    // adjust user and item bias
    userVector[USER_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * userVector[USER_BIAS_INDEX]);
    itemVector[ITEM_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * itemVector[ITEM_BIAS_INDEX]);
  }
        /// <p>
        /// Reads one line from the input file and adds the data to a {@link FastByIDMap} data structure which maps user IDs
        /// to preferences. This assumes that each line of the input file corresponds to one preference. After
        /// reading a line and determining which user and item the preference pertains to, the method should look to
        /// see if the data contains a mapping for the user ID already, and if not, add an empty data structure of preferences
        /// as appropriate to the data.
        /// </p>
        ///
        /// <p>
        /// Note that if the line is empty or begins with '#' it will be ignored as a comment.
        /// </p>
        ///
        /// @param line
        ///          line from input data file
        /// @param data
        ///          all data read so far, as a mapping from user IDs to preferences
        /// @param fromPriorData an implementation detail -- if true, data will map IDs to
        ///  {@link PreferenceArray} since the framework is attempting to read and update raw
        ///  data that is already in memory. Otherwise it maps to {@link Collection}s of
        ///  {@link Preference}s, since it's reading fresh data. Subclasses must be prepared
        ///  to handle this wrinkle.
        protected void processLine <T>(string line,
                                       FastByIDMap <T> data,
                                       FastByIDMap <FastByIDMap <DateTime?> > timestamps,
                                       bool fromPriorData)
        {
            // Ignore empty lines and comments
            if (line.Length == 0 || line[0] == COMMENT_CHAR)
            {
                return;
            }

            var    tokens                = SplitLine(line);
            string userIDString          = tokens[0];
            string itemIDString          = tokens[1];
            string preferenceValueString = tokens[2];
            bool   hasTimestamp          = tokens.Length > 3;
            string timestampString       = hasTimestamp ? tokens[3] : null;

            long userID = readUserIDFromString(userIDString);
            long itemID = readItemIDFromString(itemIDString);

            if (transpose)
            {
                long tmp = userID;
                userID = itemID;
                itemID = tmp;
            }

            // This is kind of gross but need to handle two types of storage
            var maybePrefs = data.Get(userID);

            if (fromPriorData)
            {
                // Data are PreferenceArray

                IPreferenceArray prefs = (IPreferenceArray)maybePrefs;
                if (!hasTimestamp && String.IsNullOrWhiteSpace(preferenceValueString))
                {
                    // Then line is of form "userID,itemID,", meaning remove
                    if (prefs != null)
                    {
                        bool exists = false;
                        int  length = prefs.Length();
                        for (int i = 0; i < length; i++)
                        {
                            if (prefs.GetItemID(i) == itemID)
                            {
                                exists = true;
                                break;
                            }
                        }
                        if (exists)
                        {
                            if (length == 1)
                            {
                                data.Remove(userID);
                            }
                            else
                            {
                                IPreferenceArray newPrefs = new GenericUserPreferenceArray(length - 1);
                                for (int i = 0, j = 0; i < length; i++, j++)
                                {
                                    if (prefs.GetItemID(i) == itemID)
                                    {
                                        j--;
                                    }
                                    else
                                    {
                                        newPrefs.Set(j, prefs.Get(i));
                                    }
                                }
                                data.Put(userID, (T)newPrefs);
                            }
                        }
                    }

                    removeTimestamp(userID, itemID, timestamps);
                }
                else
                {
                    float preferenceValue = float.Parse(preferenceValueString, CultureInfo.InvariantCulture);

                    bool exists = false;
                    if (uniqueUserItemCheck && prefs != null)
                    {
                        for (int i = 0; i < prefs.Length(); i++)
                        {
                            if (prefs.GetItemID(i) == itemID)
                            {
                                exists = true;
                                prefs.SetValue(i, preferenceValue);
                                break;
                            }
                        }
                    }

                    if (!exists)
                    {
                        if (prefs == null)
                        {
                            prefs = new GenericUserPreferenceArray(1);
                        }
                        else
                        {
                            IPreferenceArray newPrefs = new GenericUserPreferenceArray(prefs.Length() + 1);
                            for (int i = 0, j = 1; i < prefs.Length(); i++, j++)
                            {
                                newPrefs.Set(j, prefs.Get(i));
                            }
                            prefs = newPrefs;
                        }
                        prefs.SetUserID(0, userID);
                        prefs.SetItemID(0, itemID);
                        prefs.SetValue(0, preferenceValue);
                        data.Put(userID, (T)prefs);
                    }
                }

                addTimestamp(userID, itemID, timestampString, timestamps);
            }
            else
            {
                // Data are IEnumerable<Preference>

                IEnumerable <IPreference> prefs = ((IEnumerable <IPreference>)maybePrefs);

                if (!hasTimestamp && String.IsNullOrWhiteSpace(preferenceValueString))
                {
                    // Then line is of form "userID,itemID,", meaning remove
                    if (prefs != null)
                    {
                        // remove pref
                        var prefsIterator = ((IEnumerable <IPreference>)prefs.ToArray()).GetEnumerator();
                        while (prefsIterator.MoveNext())
                        {
                            IPreference pref = prefsIterator.Current;
                            if (pref.GetItemID() == itemID)
                            {
                                if (prefs is IList <IPreference> )
                                {
                                    ((IList <IPreference>)maybePrefs).Remove(pref);// prefsIterator.remove()
                                }
                                break;
                            }
                        }
                    }

                    removeTimestamp(userID, itemID, timestamps);
                }
                else
                {
                    float preferenceValue = float.Parse(preferenceValueString, CultureInfo.InvariantCulture);

                    bool exists = false;
                    if (uniqueUserItemCheck && prefs != null)
                    {
                        foreach (IPreference pref in prefs)
                        {
                            if (pref.GetItemID() == itemID)
                            {
                                exists = true;
                                pref.SetValue(preferenceValue);
                                break;
                            }
                        }
                    }

                    if (!exists)
                    {
                        if (prefs == null)
                        {
                            prefs = new List <IPreference>(5);
                            data.Put(userID, (T)prefs);
                        }

                        if (prefs is IList <IPreference> )
                        {
                            ((IList <IPreference>)prefs).Add(new GenericPreference(userID, itemID, preferenceValue));
                        }
                    }

                    addTimestamp(userID, itemID, timestampString, timestamps);
                }
            }
        }