private void train()
 {
     factorization = factorizer.Factorize();
     try {
         persistenceStrategy.MaybePersist(factorization);
     } catch (IOException e) {
         throw new TasteException("Error persisting factorization", e);
     }
 }
Exemple #2
0
        public void testFactorizerWithWithSyntheticData()
        {
            setUpSyntheticData();

            var stopWatch = new System.Diagnostics.Stopwatch();

            stopWatch.Start();

            factorizer = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0);

            Factorization factorization = factorizer.Factorize();

            stopWatch.Stop();
            long duration = stopWatch.ElapsedMilliseconds;

            /// a hold out test would be better, but this is just a toy example so we only check that the
            /// factorization is close to the original matrix
            IRunningAverage    avg     = new FullRunningAverage();
            var                userIDs = dataModel.GetUserIDs();
            IEnumerator <long> itemIDs;

            while (userIDs.MoveNext())
            {
                long userID = userIDs.Current;
                foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID))
                {
                    double rating     = pref.GetValue();
                    var    userVector = factorization.getUserFeatures(userID);
                    var    itemVector = factorization.getItemFeatures(pref.GetItemID());
                    double estimate   = vectorDot(userVector, itemVector);
                    double err        = rating - estimate;

                    avg.AddDatum(err * err);
                }
            }

            double sum = 0.0;

            userIDs = dataModel.GetUserIDs();
            while (userIDs.MoveNext())
            {
                long   userID         = userIDs.Current;
                var    userVector     = factorization.getUserFeatures(userID);
                double regularization = vectorDot(userVector, userVector);
                sum += regularization;
            }

            itemIDs = dataModel.GetItemIDs();
            while (itemIDs.MoveNext())
            {
                long   itemID         = itemIDs.Current;
                var    itemVector     = factorization.getUserFeatures(itemID);
                double regularization = vectorDot(itemVector, itemVector);
                sum += regularization;
            }

            double rmse = Math.Sqrt(avg.GetAverage());
            double loss = avg.GetAverage() / 2 + lambda / 2 * sum;

            logger.Info("RMSE: " + rmse + ";\tLoss: " + loss + ";\tTime Used: " + duration + "ms");
            Assert.True(rmse < 0.2);
        }
        public void testFactorizerWithWithSyntheticData()
        {
            setUpSyntheticData();

            var stopWatch = new System.Diagnostics.Stopwatch();
            stopWatch.Start();

            factorizer = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0);

            Factorization factorization = factorizer.Factorize();

            stopWatch.Stop();
            long duration = stopWatch.ElapsedMilliseconds;

            /// a hold out test would be better, but this is just a toy example so we only check that the
             /// factorization is close to the original matrix
            IRunningAverage avg = new FullRunningAverage();
            var userIDs = dataModel.GetUserIDs();
            IEnumerator<long> itemIDs;

            while (userIDs.MoveNext()) {
              long userID = userIDs.Current;
              foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) {
            double rating = pref.GetValue();
            var userVector = factorization.getUserFeatures(userID);
            var itemVector = factorization.getItemFeatures(pref.GetItemID());
            double estimate = vectorDot( userVector, itemVector);
            double err = rating - estimate;

            avg.AddDatum(err * err);
              }
            }

            double sum = 0.0;

            userIDs = dataModel.GetUserIDs();
            while (userIDs.MoveNext()) {
              long userID = userIDs.Current;
              var userVector = factorization.getUserFeatures(userID);
              double regularization = vectorDot( userVector, userVector);
              sum += regularization;
            }

            itemIDs = dataModel.GetItemIDs();
            while (itemIDs.MoveNext()) {
              long itemID = itemIDs.Current;
              var itemVector = factorization.getUserFeatures(itemID);
              double regularization = vectorDot( itemVector, itemVector);
              sum += regularization;
            }

            double rmse = Math.Sqrt(avg.GetAverage());
            double loss = avg.GetAverage() / 2 + lambda / 2 * sum;
            logger.Info("RMSE: " + rmse + ";\tLoss: " + loss + ";\tTime Used: " + duration + "ms");
            Assert.True(rmse < 0.2);
        }
        public override IEnumerable <long> Factorize(long n)
        {
            // Trivial cases

            if (n < 0)
            {
                throw new ArgumentOutOfRangeException();
            }

            if (n == 1 || n == 0)
            {
                return new long[] { }
            }
            ;

            if (_memory.Contains(n))
            {
                return new long[] { n }
            }
            ;

            // Remove factors already calculated

            var factors = new List <long>();

            foreach (var prime in _memory)
            {
                if (n < _fullySeached && prime * prime > n)
                {
                    break;
                }

                while (n % prime == 0)
                {
                    factors.Add(prime);
                    n /= prime;

                    if (n == 1)
                    {
                        break;
                    }
                }
            }

            if (n == 1)
            {
                return(factors.AsEnumerable());
            }


            if (_memory.Contains(n))
            {
                factors.Add(n);
                return(factors.AsEnumerable());
            }


            // Caculate new factors

            var newPrimes = Factorizer.Factorize(n);

            _memory.UnionWith(newPrimes);

            factors.AddRange(newPrimes);
            return(factors.AsEnumerable());
        }