예제 #1
0
 public static IPosOnlyFeedback CreatePosOnlyFeedback()
 {
     var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();
     feedback.Add(0, 0);
     feedback.Add(0, 1);
     feedback.Add(1, 0);
     feedback.Add(1, 2);
     return feedback;
 }
예제 #2
0
		[Test()] public void TestAdd()
		{
			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

			feedback.Add(1, 4);
			feedback.Add(1, 8);
			feedback.Add(2, 4);
			feedback.Add(2, 2);
			feedback.Add(2, 5);
			feedback.Add(3, 7);
			feedback.Add(6, 3);
			feedback.Add(8, 1);

			Assert.IsTrue(feedback.UserMatrix[2, 5]);
			Assert.IsTrue(feedback.UserMatrix[1, 4]);
			Assert.IsTrue(feedback.UserMatrix[6, 3]);
			Assert.IsTrue(feedback.UserMatrix[2, 2]);
			Assert.IsFalse(feedback.UserMatrix[5, 2]);
			Assert.IsFalse(feedback.UserMatrix[4, 1]);
			Assert.IsFalse(feedback.UserMatrix[3, 6]);

			Assert.IsTrue(feedback.ItemMatrix[5, 2]);
			Assert.IsTrue(feedback.ItemMatrix[4, 1]);
			Assert.IsTrue(feedback.ItemMatrix[3, 6]);
			Assert.IsTrue(feedback.ItemMatrix[2, 2]);
			Assert.IsFalse(feedback.ItemMatrix[2, 5]);
			Assert.IsFalse(feedback.ItemMatrix[1, 4]);
			Assert.IsFalse(feedback.ItemMatrix[6, 3]);

			Assert.AreEqual(8, feedback.Count);
		}
예제 #3
0
        /// <summary>Read in implicit feedback data from a TextReader</summary>
        /// <param name="reader">the TextReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            var split_chars = new char[]{ '\t', ' ', ',' };
            string line;

            while ( (line = reader.ReadLine()) != null )
            {
                if (line.Trim().Length == 0)
                    continue;

                string[] tokens = line.Split(split_chars);

                if (tokens.Length < 2)
                    throw new IOException("Expected at least two columns: " + line);

                int user_id = user_mapping.ToInternalID(int.Parse(tokens[0]));
                int item_id = item_mapping.ToInternalID(int.Parse(tokens[1]));

               	feedback.Add(user_id, item_id);
            }

            return feedback;
        }
예제 #4
0
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// The evaluation protocol works as follows:
        /// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update.
        /// The sequence of users is random.
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static ItemRecommendationEvaluationResults EvaluateOnline(
			this IRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback training,
			IList<int> test_users, IList<int> candidate_items,
			CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;
            if (incremental_recommender == null)
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");

            // prepare candidate items once to avoid recreating them
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: candidate_items = training.AllItems; break;
                case CandidateItems.TEST:     candidate_items = test.AllItems; break;
                case CandidateItems.OVERLAP:  candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break;
                case CandidateItems.UNION:    candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break;
            }

            test_users.Shuffle();
            var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>();
            foreach (int user_id in test_users)
            {
                if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0)
                    continue;

                // prepare data
                var current_test_data = new PosOnlyFeedback<SparseBooleanMatrix>();
                foreach (int index in test.ByUser[user_id])
                    current_test_data.Add(user_id, test.Items[index]);
                // evaluate user
                var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT);
                results_by_user[user_id] = current_result;

                // update recommender
                var tuples = new List<Tuple<int, int>>();
                foreach (int index in test.ByUser[user_id])
                    tuples.Add(Tuple.Create(user_id, test.Items[index]));
                incremental_recommender.AddFeedback(tuples);
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Items.Measures)
                    results[measure] += results_by_user[u][measure];

            foreach (string measure in Items.Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = results_by_user.Count;

            return results;
        }
예제 #5
0
		public void SetUp()
		{
			training_data = new PosOnlyFeedback<SparseBooleanMatrix>();
			training_data.Add(1, 1);
			training_data.Add(1, 2);
			training_data.Add(2, 2);
			training_data.Add(2, 3);
			training_data.Add(3, 1);
			training_data.Add(3, 2);

			recommender = new MostPopular() { Feedback = training_data };
			recommender.Train();

			test_data = new PosOnlyFeedback<SparseBooleanMatrix>();
			test_data.Add(2, 3);
			test_data.Add(2, 4);
			test_data.Add(4, 4);

			all_users = Enumerable.Range(1, 4).ToList();
			candidate_items = Enumerable.Range(1, 5).ToList();
		}
예제 #6
0
		[Test()] public void TestMaxUserIDMaxItemID()
		{
			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();
			feedback.Add(1, 4);
			feedback.Add(1, 8);
			feedback.Add(2, 4);
			feedback.Add(2, 2);
			feedback.Add(2, 5);
			feedback.Add(3, 7);
			feedback.Add(6, 3);

			Assert.AreEqual(6, feedback.MaxUserID);
			Assert.AreEqual(8, feedback.MaxItemID);
		}
예제 #7
0
        [Test()] public void TestGetItemMatrixCopy()
        {
            var feedback = new PosOnlyFeedback <SparseBooleanMatrix>();

            feedback.Add(1, 4);
            feedback.Add(1, 8);
            feedback.Add(2, 4);
            feedback.Add(2, 2);
            feedback.Add(2, 5);
            feedback.Add(3, 7);
            feedback.Add(6, 3);
            feedback.Add(8, 1);

            var item_matrix = feedback.GetItemMatrixCopy();

            // check whether we got the item matrix
            Assert.IsTrue(item_matrix[5, 2]);
            Assert.IsTrue(item_matrix[4, 1]);
            Assert.IsTrue(item_matrix[3, 6]);
            Assert.IsTrue(item_matrix[2, 2]);
            Assert.IsFalse(item_matrix[2, 5]);
            Assert.IsFalse(item_matrix[1, 4]);
            Assert.IsFalse(item_matrix[6, 3]);

            // check de-coupling
            item_matrix[5, 2] = false;
            Assert.IsFalse(item_matrix[5, 2]);

            Assert.IsTrue(feedback.ItemMatrix[5, 2]);
            Assert.IsTrue(feedback.ItemMatrix[4, 1]);
            Assert.IsTrue(feedback.ItemMatrix[3, 6]);
            Assert.IsTrue(feedback.ItemMatrix[2, 2]);
            Assert.IsFalse(feedback.ItemMatrix[2, 5]);
            Assert.IsFalse(feedback.ItemMatrix[1, 4]);
            Assert.IsFalse(feedback.ItemMatrix[6, 3]);

            Assert.AreEqual(8, feedback.Count);
        }
예제 #8
0
        /// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the IDataReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        static public IPosOnlyFeedback Read(IDataReader reader, IMapping user_mapping, IMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback <SparseBooleanMatrix>();

            if (reader.FieldCount < 2)
            {
                throw new FormatException("Expected at least 2 columns.");
            }

            Func <string> get_user_id = reader.GetStringGetter(0);
            Func <string> get_item_id = reader.GetStringGetter(1);

            while (reader.Read())
            {
                int user_id = user_mapping.ToInternalID(get_user_id());
                int item_id = item_mapping.ToInternalID(get_item_id());

                feedback.Add(user_id, item_id);
            }

            return(feedback);
        }
예제 #9
0
    private static void startItemKNN(string data)
    {
        MyMediaLite.Data.Mapping user_mapping = new MyMediaLite.Data.Mapping();
        MyMediaLite.Data.Mapping item_mapping = new MyMediaLite.Data.Mapping();
        ITimedRatings            all_data     = readDataMapped(data, ref user_mapping, ref item_mapping);

        removeUserThreshold(ref all_data);

        Console.WriteLine("Start iteration Test ItemKNN");

        ITimedRatings validation_data = new TimedRatings();    // 10%
        ITimedRatings test_data       = new TimedRatings();    // 20%
        ITimedRatings training_data   = new TimedRatings();    // 70%

        readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data);
        IPosOnlyFeedback training_data_pos = new PosOnlyFeedback <SparseBooleanMatrix> ();        // 80%

        for (int index = 0; index < training_data.Users.Count; index++)
        {
            training_data_pos.Add(training_data.Users [index], training_data.Items [index]);
        }


        MyMediaLite.ItemRecommendation.ItemKNN recommender = new MyMediaLite.ItemRecommendation.ItemKNN();
        recommender.Feedback = training_data_pos;
        DateTime start_time = DateTime.Now;

        recommender.Train();

        Console.Write("Total Training time needed:");
        Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds);
        Console.WriteLine("Final results in this iteration:");
        var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false);

        results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false);
        //}
    }
예제 #10
0
        public void Train(IBasicTrainingData <IPositiveFeedbackForUser> trainingData)
        {
            m_realUserIdToMediaLiteUserId = new Dictionary <int, int>();
            m_mediaLiteUserIdToRealUserId = new Dictionary <int, int>();
            m_nextMediaLiteUserId         = 0;

            m_realItemIdToMediaLiteItemId = new Dictionary <int, int>();
            m_mediaLiteItemIdToRealItemId = new Dictionary <int, int>();
            m_nextMediaLiteItemId         = 0;

            PosOnlyFeedback <SparseBooleanMatrix> mediaLiteFeedback = new PosOnlyFeedback <SparseBooleanMatrix>();

            foreach (KeyValuePair <int, IPositiveFeedbackForUser> userFeedbackPair in trainingData.Users)
            {
                int userId = userFeedbackPair.Key;
                IPositiveFeedbackForUser feedback = userFeedbackPair.Value;

                m_realUserIdToMediaLiteUserId[userId] = m_nextMediaLiteUserId;
                m_mediaLiteUserIdToRealUserId[m_nextMediaLiteUserId] = userId;
                m_nextMediaLiteUserId++;

                foreach (int itemId in feedback.Items)
                {
                    if (!m_realItemIdToMediaLiteItemId.ContainsKey(itemId))
                    {
                        m_realItemIdToMediaLiteItemId[itemId] = m_nextMediaLiteItemId;
                        m_mediaLiteItemIdToRealItemId[m_nextMediaLiteItemId] = itemId;
                        m_nextMediaLiteItemId++;
                    }

                    mediaLiteFeedback.Add(m_realUserIdToMediaLiteUserId[userId], m_realItemIdToMediaLiteItemId[itemId]);
                }
            }

            m_recommender.Feedback = mediaLiteFeedback;
            m_recommender.Train();
        }
예제 #11
0
		/// <summary>Read in implicit feedback data from a TextReader</summary>
		/// <param name="reader">the TextReader to be read from</param>
		/// <param name="user_mapping">user <see cref="IMapping"/> object</param>
		/// <param name="item_mapping">item <see cref="IMapping"/> object</param>
		/// <param name="ignore_first_line">if true, ignore the first line</param>
		/// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
		static public IPosOnlyFeedback Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false)
		{
			if (user_mapping == null)
				user_mapping = new IdentityMapping();
			if (item_mapping == null)
				item_mapping = new IdentityMapping();
			if (ignore_first_line)
				reader.ReadLine();

			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

			string line;
			while ((line = reader.ReadLine()) != null)
			{
				if (line.Trim().Length == 0)
					continue;

				string[] tokens = line.Split(Constants.SPLIT_CHARS);

				if (tokens.Length < 2)
					throw new FormatException("Expected at least 2 columns: " + line);

				try
				{
					int user_id = user_mapping.ToInternalID(tokens[0]);
					int item_id = item_mapping.ToInternalID(tokens[1]);
					feedback.Add(user_id, item_id);
				}
				catch (Exception)
				{
					throw new FormatException(string.Format("Could not read line '{0}'", line));
				}
			}

			return feedback;
		}
예제 #12
0
		[Test()] public void TestGetItemMatrixCopy()
		{
			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

			feedback.Add(1, 4);
			feedback.Add(1, 8);
			feedback.Add(2, 4);
			feedback.Add(2, 2);
			feedback.Add(2, 5);
			feedback.Add(3, 7);
			feedback.Add(6, 3);
			feedback.Add(8, 1);

			var item_matrix = feedback.GetItemMatrixCopy();

			// check whether we got the item matrix
			Assert.IsTrue(item_matrix[5, 2]);
			Assert.IsTrue(item_matrix[4, 1]);
			Assert.IsTrue(item_matrix[3, 6]);
			Assert.IsTrue(item_matrix[2, 2]);
			Assert.IsFalse(item_matrix[2, 5]);
			Assert.IsFalse(item_matrix[1, 4]);
			Assert.IsFalse(item_matrix[6, 3]);

			// check de-coupling
			item_matrix[5, 2] = false;
			Assert.IsFalse(item_matrix[5, 2]);

			Assert.IsTrue(feedback.ItemMatrix[5, 2]);
			Assert.IsTrue(feedback.ItemMatrix[4, 1]);
			Assert.IsTrue(feedback.ItemMatrix[3, 6]);
			Assert.IsTrue(feedback.ItemMatrix[2, 2]);
			Assert.IsFalse(feedback.ItemMatrix[2, 5]);
			Assert.IsFalse(feedback.ItemMatrix[1, 4]);
			Assert.IsFalse(feedback.ItemMatrix[6, 3]);

			Assert.AreEqual(8, feedback.Count);
		}
예제 #13
0
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// The evaluation protocol works as follows:
        /// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update.
        /// The sequence of users is random.
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        static public ItemRecommendationEvaluationResults EvaluateOnline(
            this IRecommender recommender,
            IPosOnlyFeedback test, IPosOnlyFeedback training,
            IList <int> test_users, IList <int> candidate_items,
            CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;

            if (incremental_recommender == null)
            {
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");
            }

            candidate_items = Items.Candidates(candidate_items, candidate_item_mode, test, training);

            test_users.Shuffle();
            var results_by_user = new Dictionary <int, ItemRecommendationEvaluationResults>();

            foreach (int user_id in test_users)
            {
                if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0)
                {
                    continue;
                }

                // prepare data
                var current_test_data = new PosOnlyFeedback <SparseBooleanMatrix>();
                foreach (int index in test.ByUser[user_id])
                {
                    current_test_data.Add(user_id, test.Items[index]);
                }
                // evaluate user
                var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT);
                results_by_user[user_id] = current_result;

                // update recommender
                var tuples = new List <Tuple <int, int> >();
                foreach (int index in test.ByUser[user_id])
                {
                    tuples.Add(Tuple.Create(user_id, test.Items[index]));
                }
                incremental_recommender.AddFeedback(tuples);
                // TODO candidate_items should be updated properly
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
            {
                foreach (string measure in Items.Measures)
                {
                    results[measure] += results_by_user[u][measure];
                }
            }

            foreach (string measure in Items.Measures)
            {
                results[measure] /= results_by_user.Count;
            }

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = results_by_user.Count;

            return(results);
        }
예제 #14
0
    static IPosOnlyFeedback CreateFeedback(IRatings ratings, double threshold)
    {
        var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

        for (int index = 0; index < ratings.Count; index++)
            if (ratings[index] >= threshold)
                feedback.Add(ratings.Users[index], ratings.Items[index]);

        Console.Error.WriteLine("{0} ratings > {1}", feedback.Count, threshold);

        return feedback;
    }
예제 #15
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing)
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data (must be connected to the recommender's training data)</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        static public Dictionary <string, double> EvaluateOnline(
            IItemRecommender recommender,
            IPosOnlyFeedback test, IPosOnlyFeedback train,
            ICollection <int> relevant_users, ICollection <int> relevant_items)
        {
            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos   = 0;

            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
            {
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }
            }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];

            for (int index = 0; index < random_index.Length; index++)
            {
                random_index[index] = index;
            }
            Util.Utils.Shuffle <int>(random_index);

            var results_by_user = new Dictionary <int, Dictionary <string, double> >();

            foreach (int index in random_index)
            {
                if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback <SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items);

                    if (current_result["num_users"] == 1)
                    {
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Measures)
                            {
                                results_by_user[users[index]][measure] += current_result[measure];
                            }
                            results_by_user[users[index]]["num_items"]++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                    }
                }

                // update recommender
                recommender.AddFeedback(users[index], items[index]);
            }

            var results = new Dictionary <string, double>();

            foreach (string measure in Measures)
            {
                results[measure] = 0;
            }

            foreach (int u in results_by_user.Keys)
            {
                foreach (string measure in Measures)
                {
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];
                }
            }

            foreach (string measure in Measures)
            {
                results[measure] /= results_by_user.Count;
            }

            results["num_users"] = results_by_user.Count;
            results["num_items"] = relevant_items.Count;
            results["num_lists"] = test.Count;             // FIXME this is not exact

            return(results);
        }
		/// <summary>Read in rating data which will be interpreted as implicit feedback data from a TextReader</summary>
		/// <param name="reader">the TextReader to be read from</param>
		/// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param>
		/// <param name="user_mapping">user <see cref="IMapping"/> object</param>
		/// <param name="item_mapping">item <see cref="IMapping"/> object</param>
		/// <param name="ignore_first_line">if true, ignore the first line</param>
		/// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
		static public IPosOnlyFeedback Read(TextReader reader, float rating_threshold, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false)
		{
			if (user_mapping == null)
				user_mapping = new IdentityMapping();
			if (item_mapping == null)
				item_mapping = new IdentityMapping();
			if (ignore_first_line)
				reader.ReadLine();

			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

			string line;
			while ((line = reader.ReadLine()) != null)
			{
				if (line.Trim().Length == 0)
					continue;

				string[] tokens = line.Split(Constants.SPLIT_CHARS);

				if (tokens.Length < 3)
					throw new FormatException("Expected at least 3 columns: " + line);

				int user_id   = user_mapping.ToInternalID(tokens[0]);
				int item_id   = item_mapping.ToInternalID(tokens[1]);
				float rating  = float.Parse(tokens[2], CultureInfo.InvariantCulture);

				if (rating >= rating_threshold)
					feedback.Add(user_id, item_id);
			}

			return feedback;
		}
예제 #17
0
        /// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the IDataReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            if (reader.FieldCount < 2)
                throw new IOException("Expected at least two columns.");

            while (reader.Read())
            {
                int user_id = user_mapping.ToInternalID(reader.GetInt32(0));
                int item_id = item_mapping.ToInternalID(reader.GetInt32(1));

                feedback.Add(user_id, item_id);
            }

            return feedback;
        }
예제 #18
0
    static void LoadData()
    {
        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            // training data
            training_file = Path.Combine(data_dir, training_file);
            training_data = double.IsNaN(rating_threshold)
                ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);

            // user attributes
            if (user_attributes_file != null)
                user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            if (recommender is IUserAttributeAwareRecommender)
                ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes;

            // item attributes
            if (item_attributes_file != null)
                item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            if (recommender is IItemAttributeAwareRecommender)
                ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes;

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);
            }

            // user groups
            if (user_groups_file != null)
            {
                group_to_user = RelationData.Read(Path.Combine(data_dir, user_groups_file), user_mapping); // assumption: user and user group IDs are disjoint
                user_groups = group_to_user.NonEmptyRowIDs;
                Console.WriteLine("{0} user groups", user_groups.Count);
            }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_file = Path.Combine(data_dir, test_file);
                    test_data = double.IsNaN(rating_threshold)
                        ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                        : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);
                }
            }
            else
            {
                var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }

            if (group_method == "GroupsAsUsers")
            {
                Console.WriteLine("group recommendation strategy: {0}", group_method);
                // TODO verify what is going on here

                //var training_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                // transform groups to users
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in training_data.UserMatrix.GetEntriesByRow(user_id))
                            training_data.Add(group_id, item_id);
                // add the users that do not belong to groups

                //training_data = training_data_group;

                // transform groups to users
                var test_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in test_data.UserMatrix.GetEntriesByRow(user_id))
                            test_data_group.Add(group_id, item_id);

                test_data = test_data_group;

                group_method = null; // deactivate s.t. the normal eval routines are used
            }

            if (user_prediction)
            {
                // swap file names for test users and candidate items
                var ruf = test_users_file;
                var rif = candidate_items_file;
                test_users_file = rif;
                candidate_items_file = ruf;

                // swap user and item mappings
                var um = user_mapping;
                var im = item_mapping;
                user_mapping = im;
                item_mapping = um;

                // transpose training and test data
                training_data = training_data.Transpose();

                // transpose test data
                if (test_data != null)
                    test_data = test_data.Transpose();
            }

            if (recommender is MyMediaLite.ItemRecommendation.ItemRecommender)
                ((ItemRecommender)recommender).Feedback = training_data;

            // test users
            if (test_users_file != null)
                test_users = user_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray() );
            else
                test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers;

            // if necessary, perform user sampling
            if (num_test_users > 0 && num_test_users < test_users.Count)
            {
                var old_test_users = new HashSet<int>(test_users);
                var new_test_users = new int[num_test_users];
                for (int i = 0; i < num_test_users; i++)
                {
                    int random_index = MyMediaLite.Util.Random.GetInstance().Next(old_test_users.Count - 1);
                    new_test_users[i] = old_test_users.ElementAt(random_index);
                    old_test_users.Remove(new_test_users[i]);
                }
                test_users = new_test_users;
            }

            // candidate items
            if (candidate_items_file != null)
                candidate_items = item_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray() );
            else if (all_items)
                candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray();

            if (candidate_items != null)
                eval_item_mode = CandidateItems.EXPLICIT;
            else if (in_training_items)
                eval_item_mode = CandidateItems.TRAINING;
            else if (in_test_items)
                eval_item_mode = CandidateItems.TEST;
            else if (overlap_items)
                eval_item_mode = CandidateItems.OVERLAP;
            else
                eval_item_mode = CandidateItems.UNION;
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
예제 #19
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing); repeated events
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static ItemRecommendationEvaluationResults EvaluateOnline(
			this IRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback training,
			IList<int> test_users, IList<int> candidate_items,
			CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;
            if (incremental_recommender == null)
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");

            // prepare candidate items once to avoid recreating them
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: candidate_items = training.AllItems; break;
                case CandidateItems.TEST:     candidate_items = test.AllItems; break;
                case CandidateItems.OVERLAP:  candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break;
                case CandidateItems.UNION:    candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break;
            }
            candidate_item_mode = CandidateItems.EXPLICIT;

            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos = 0;
            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];
            for (int index = 0; index < random_index.Length; index++)
                random_index[index] = index;
            random_index.Shuffle();

            var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>();

            int num_lists = 0;

            foreach (int index in random_index)
            {
                if (test_users.Contains(users[index]) && candidate_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback<SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Items.Evaluate(recommender, current_test, training, current_test.AllUsers, candidate_items, candidate_item_mode);

                    if (current_result["num_users"] == 1)
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Items.Measures)
                                results_by_user[users[index]][measure] += current_result[measure];
                            results_by_user[users[index]]["num_items"]++;
                            num_lists++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                }

                // update recommender
                incremental_recommender.AddFeedback(users[index], items[index]);
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Items.Measures)
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];

            foreach (string measure in Items.Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = num_lists;

            return results;
        }
예제 #20
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing)
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data (must be connected to the recommender's training data)</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static Dictionary<string, double> EvaluateOnline(
			IItemRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback train,
		    ICollection<int> relevant_users, ICollection<int> relevant_items)
        {
            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos = 0;
            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];
            for (int index = 0; index < random_index.Length; index++)
                random_index[index] = index;
            Util.Utils.Shuffle<int>(random_index);

            var results_by_user = new Dictionary<int, Dictionary<string, double>>();

            foreach (int index in random_index)
            {
                if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback<SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items);

                    if (current_result["num_users"] == 1)
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Measures)
                                results_by_user[users[index]][measure] += current_result[measure];
                            results_by_user[users[index]]["num_items"]++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                }

                // update recommender
                recommender.AddFeedback(users[index], items[index]);
            }

            var results = new Dictionary<string, double>();
            foreach (string measure in Measures)
                results[measure] = 0;

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Measures)
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];

            foreach (string measure in Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = relevant_items.Count;
            results["num_lists"] = test.Count; // FIXME this is not exact

            return results;
        }
예제 #21
0
        /// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the IDataReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(IDataReader reader, IMapping user_mapping, IMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            if (reader.FieldCount < 2)
                throw new FormatException("Expected at least 2 columns.");

            Func<string> get_user_id = reader.GetStringGetter(0);
            Func<string> get_item_id = reader.GetStringGetter(1);

            while (reader.Read())
            {
                int user_id = user_mapping.ToInternalID(get_user_id());
                int item_id = item_mapping.ToInternalID(get_item_id());

                feedback.Add(user_id, item_id);
            }

            return feedback;
        }
예제 #22
0
		[Test()] public void TestRemoveItem()
		{
			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();
			feedback.Add(1, 4);
			feedback.Add(1, 8);
			feedback.Add(2, 4);
			feedback.Add(2, 2);
			feedback.Add(2, 5);
			feedback.Add(3, 4);
			feedback.Add(3, 3);

			Assert.AreEqual(7, feedback.Count);
			Assert.IsTrue(feedback.UserMatrix[2, 4]);
			feedback.RemoveItem(4);
			Assert.IsFalse(feedback.UserMatrix[2, 4]);
			Assert.AreEqual(4, feedback.Count);
		}
예제 #23
0
        /// <summary>Read in rating data which will be interpreted as implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the IDataReader to be read from</param>
        /// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(IDataReader reader, float rating_threshold, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            if (reader.FieldCount < 3)
                throw new FormatException("Expected at least 3 columns.");
            Func<string> get_user_id = reader.GetStringGetter(0);
            Func<string> get_item_id = reader.GetStringGetter(1);
            Func<float>  get_rating  = reader.GetFloatGetter(2);

            while (reader.Read())
            {
                int user_id  = user_mapping.ToInternalID(get_user_id());
                int item_id  = item_mapping.ToInternalID(get_item_id());
                float rating = get_rating();

                if (rating >= rating_threshold)
                    feedback.Add(user_id, item_id);
            }

            return feedback;
        }
예제 #24
0
		[Test()] public void TestAllItems()
		{
			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();
			feedback.Add(1, 4);
			feedback.Add(1, 8);
			feedback.Add(2, 4);
			feedback.Add(2, 2);
			feedback.Add(2, 5);
			feedback.Add(3, 7);
			feedback.Add(3, 3);
			feedback.Add(6, 3);

			Assert.AreEqual(6, feedback.AllItems.Count);
		}