Sparse representation of a boolean matrix, using HashSets
Fast row-wise access is possible. Indexes are zero-based. If you need a more memory-efficient data structure, try SparseBooleanMatrixBinarySearch or SparseBooleanMatrixStatic.
Inheritance: IBooleanMatrix
示例#1
0
        /// <summary>Read binary relation data from file</summary>
        /// <remarks>
        /// The expected (sparse) line format is:
        /// ENTITY_ID whitespace ENTITY_ID
        /// for the relations that hold.
        /// </remarks>
        /// <param name="reader">a StreamReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the relation data</returns>
        public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping)
        {
            var matrix = new SparseBooleanMatrix();

            char[] split_chars = new char[]{ '\t', ' ' };
            string line;

            while (!reader.EndOfStream)
            {
               	line = reader.ReadLine();

                // ignore empty lines
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(split_chars);

                if (tokens.Length != 2)
                    throw new IOException("Expected exactly two columns: " + line);

                int entity1_id = mapping.ToInternalID(int.Parse(tokens[0]));
                int entity2_id = mapping.ToInternalID(int.Parse(tokens[1]));

               	matrix[entity1_id, entity2_id] = true;
            }

            return matrix;
        }
		[Test()] public void TestCreateMatrix()
		{
			var matrix = new SparseBooleanMatrix();

			var other_matrix = matrix.CreateMatrix(2, 2);
			Assert.IsInstanceOf(matrix.GetType(), other_matrix);
		}
示例#3
0
        public void TestCreate()
        {
            var sparse_boolean_matrix = new SparseBooleanMatrix();
            sparse_boolean_matrix[0, 1] = true;
            sparse_boolean_matrix[0, 4] = true;
            sparse_boolean_matrix[1, 0] = true;
            sparse_boolean_matrix[1, 2] = true;
            sparse_boolean_matrix[1, 4] = true;
            sparse_boolean_matrix[3, 1] = true;
            sparse_boolean_matrix[3, 3] = true;
            sparse_boolean_matrix[3, 4] = true;

            var correlation_matrix = BinaryCosine.Create(sparse_boolean_matrix);

            Assert.AreEqual(4, correlation_matrix.NumberOfRows);
            Assert.IsTrue(correlation_matrix.IsSymmetric);

            Assert.AreEqual(1 / Math.Sqrt(6), correlation_matrix[0, 1], delta);
            Assert.AreEqual(1 / Math.Sqrt(6), correlation_matrix[1, 0], delta);
            Assert.AreEqual(1 / 3d, correlation_matrix[1, 3], delta);

            Assert.AreEqual(0f, correlation_matrix[2, 0]);
            Assert.AreEqual(0f, correlation_matrix[2, 1]);
            Assert.AreEqual(1f, correlation_matrix[2, 2]);
            Assert.AreEqual(0f, correlation_matrix[2, 3]);

            Assert.AreEqual(0f, correlation_matrix[0, 2]);
            Assert.AreEqual(0f, correlation_matrix[1, 2]);
            Assert.AreEqual(0f, correlation_matrix[3, 2]);
        }
 public void TestNumberOfColumns()
 {
     var matrix = new SparseBooleanMatrix();
     for (int i = 0; i < 5; i++)
     {
         matrix[i, 1]= true;
         matrix[i, 4]= true;
     }
     Assert.AreEqual(5, matrix.NumberOfColumns);
 }
示例#5
0
        public override void Train()
        {
            filtered_items_by_user = new Dictionary<int, ICollection<int>>[MaxUserID + 1];
            items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose();

            Console.Error.WriteLine("max_user_id {0} max_item_id {1}", MaxUserID, MaxItemID);

            for (int u = 0; u < filtered_items_by_user.Count; u++)
                filtered_items_by_user[u] = ItemsFiltered.GetFilteredItems(u, Feedback, ItemAttributes);

            base.Train();
        }
        /// <summary>Get the transpose of the matrix, i.e. a matrix where rows and columns are interchanged</summary>
        /// <returns>the transpose of the matrix (copy)</returns>
        public IMatrix <bool> Transpose()
        {
            var transpose = new SparseBooleanMatrix();

            for (int i = 0; i < row_list.Count; i++)
            {
                foreach (int j in this[i])
                {
                    transpose[j, i] = true;
                }
            }
            return(transpose);
        }
        public void TestNonEmptyRows()
        {
            var matrix = new SparseBooleanMatrix();
            for (int i = 0; i < 5; i++)
                if (i != 2)
                {
                    matrix[i, 1]= true;
                    matrix[i, 4]= true;
                }
            Assert.IsTrue(matrix[0, 1]);
            IList<KeyValuePair<int, HashSet<int>>> nonEmptyRows = matrix.NonEmptyRows;
            Assert.AreEqual(4, nonEmptyRows.Count);

            // TODO test contents
        }
示例#8
0
        /// <summary>Display dataset statistics</summary>
        /// <param name="train">the training data</param>
        /// <param name="test">the test data</param>
        /// <param name="user_attributes">the user attributes</param>
        /// <param name="item_attributes">the item attributes</param>
        /// <param name="display_overlap">if set true, display the user/item overlap between train and test</param>
        public static string Statistics(
			this IRatings train, IRatings test = null,
			SparseBooleanMatrix user_attributes = null, SparseBooleanMatrix item_attributes = null,
			bool display_overlap = false)
        {
            // training data stats
            int num_users = train.AllUsers.Count;
            int num_items = train.AllItems.Count;
            long matrix_size = (long) num_users * num_items;
            long empty_size  = (long) matrix_size - train.Count;
            double sparsity = (double) 100L * empty_size / matrix_size;
            string s = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, train.Count, sparsity);
            if (train is ITimedRatings)
            {
                var time_train = train as ITimedRatings;
                s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_train.EarliestTime, time_train.LatestTime);
            }

            // test data stats
            if (test != null)
            {
                num_users = test.AllUsers.Count;
                num_items = test.AllItems.Count;
                matrix_size = (long) num_users * num_items;
                empty_size  = (long) matrix_size - test.Count; // TODO depends on the eval scheme whether this is correct
                sparsity = (double) 100L * empty_size / matrix_size;
                s += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, test.Count, sparsity);
                if (test is ITimedRatings)
                {
                    var time_test = test as ITimedRatings;
                    s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_test.EarliestTime, time_test.LatestTime);
                }
            }

            // count and display the overlap between train and test
            if (display_overlap && test != null)
            {
                int num_new_users = 0;
                int num_new_items = 0;
                TimeSpan seconds = Util.Wrap.MeasureTime(delegate() {
                    num_new_users = test.AllUsers.Except(train.AllUsers).Count();
                    num_new_items = test.AllItems.Except(train.AllItems).Count();
                });
                s += string.Format("{0} new users, {1} new items ({2} seconds)\n", num_new_users, num_new_items, seconds);
            }

            return s + Statistics(user_attributes, item_attributes);
        }
		[Test()] public void TestGetEntriesByRow()
		{
			var matrix = new SparseBooleanMatrix();
			for (int i = 0; i < 5; i++)
				if (i != 2 && i !=3)
				{
					matrix[i, 1] = true;
					matrix[i, 4] = true;
				}

			Assert.AreEqual(2, matrix.GetEntriesByRow(0).Count);
			Assert.AreEqual(2, matrix.GetEntriesByRow(1).Count);
			Assert.AreEqual(0, matrix.GetEntriesByRow(2).Count);
			Assert.AreEqual(0, matrix.GetEntriesByRow(3).Count);
			Assert.AreEqual(2, matrix.GetEntriesByRow(4).Count);
		}
		[Test()] public void TestIsSymmetric()
		{
			var matrix = new SparseBooleanMatrix();
			Assert.IsTrue(matrix.IsSymmetric);

			matrix[1, 1] = true;
			Assert.IsTrue(matrix.IsSymmetric);

			matrix[2, 1] = true;
			Assert.IsFalse(matrix.IsSymmetric);

			matrix[1, 2] = true;
			Assert.IsTrue(matrix.IsSymmetric);

			matrix[2, 1] = false;
			Assert.IsFalse(matrix.IsSymmetric);
		}
示例#11
0
 public void TestCreate()
 {
     // create test objects
     var sparse_boolean_matrix = new SparseBooleanMatrix();
     sparse_boolean_matrix[0, 1] = true;
     sparse_boolean_matrix[0, 4] = true;
     sparse_boolean_matrix[1, 0] = true;
     sparse_boolean_matrix[1, 2] = true;
     sparse_boolean_matrix[1, 4] = true;
     sparse_boolean_matrix[3, 1] = true;
     sparse_boolean_matrix[3, 3] = true;
     sparse_boolean_matrix[3, 4] = true;
     // test
     var correlation_matrix = BinaryCosine.Create(sparse_boolean_matrix);
     Assert.AreEqual(Math.Round(1 / Math.Sqrt(6), 4), Math.Round(correlation_matrix[0, 1], 4));
     Assert.AreEqual(Math.Round(1 / Math.Sqrt(6), 4), Math.Round(correlation_matrix[1, 0], 4));
     Assert.AreEqual(Math.Round(1 / 3d, 4), Math.Round(correlation_matrix[1, 3], 4));
 }
示例#12
0
        public void TestComputeCorrelations()
        {
            var sparse_boolean_matrix = new SparseBooleanMatrix();
            sparse_boolean_matrix[0, 1] = true;
            sparse_boolean_matrix[0, 4] = true;
            sparse_boolean_matrix[1, 0] = true;
            sparse_boolean_matrix[1, 2] = true;
            sparse_boolean_matrix[1, 4] = true;
            sparse_boolean_matrix[3, 1] = true;
            sparse_boolean_matrix[3, 3] = true;
            sparse_boolean_matrix[3, 4] = true;

            var correlation = new BinaryCosine(4);
            correlation.ComputeCorrelations(sparse_boolean_matrix);
            Assert.AreEqual(1 / Math.Sqrt(6), correlation[0, 1], delta);
            Assert.AreEqual(1 / Math.Sqrt(6), correlation[1, 0], delta);
            Assert.AreEqual(1 / 3d, correlation[1, 3], delta);
        }
 public void TestNonEmptyRowIDs()
 {
     var matrix = new SparseBooleanMatrix();
     for (int i = 0; i < 5; i++)
         if (i != 2 && i !=3)
         {
             matrix[i, 1]= true;
             matrix[i, 4]= true;
         }
     ICollection<int> rowIDs = matrix.NonEmptyRowIDs;
     IEnumerator <int> rowIDsEnum = rowIDs.GetEnumerator();
     rowIDsEnum.MoveNext();
     Assert.AreEqual(0, rowIDsEnum.Current);
     rowIDsEnum.MoveNext();
     Assert.AreEqual(1, rowIDsEnum.Current);
     rowIDsEnum.MoveNext();
     rowIDsEnum.MoveNext();
     Assert.AreEqual(4, rowIDsEnum.Current);
     Assert.IsFalse(rowIDsEnum.MoveNext());
 }
示例#14
0
        /// <summary>Read binary attribute data from a StreamReader</summary>
        /// <remarks>
        /// The expected (sparse) line format is:
        /// ENTITY_ID tab/space/comma ATTRIBUTE_ID
        /// for the relations that hold.
        /// </remarks>
        /// <param name="reader">a StreamReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the attribute data</returns>
        public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping)
        {
            var matrix = new SparseBooleanMatrix();

            string line;
            while ((line = reader.ReadLine()) != null)
            {
                // ignore empty lines
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(Constants.SPLIT_CHARS);

                if (tokens.Length != 2)
                    throw new FormatException("Expected exactly 2 columns: " + line);

                int entity_id = mapping.ToInternalID(tokens[0]);
                int attr_id   = int.Parse(tokens[1]);

                matrix[entity_id, attr_id] = true;
            }

            return matrix;
        }
示例#15
0
        /// <summary>Evaluation for rankings of filtered items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="item_attributes">the item attributes to be used for filtering</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static Dictionary<string, double> Evaluate(
			IItemRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
		    SparseBooleanMatrix item_attributes,
		    ICollection<int> relevant_users,
			ICollection<int> relevant_items)
        {
            if (train.Overlap(test) > 0)
                Console.Error.WriteLine("WARNING: Overlapping train and test data");

            SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose();

            // compute evaluation measures
            double auc_sum     = 0;
            double map_sum     = 0;
            double prec_5_sum  = 0;
            double prec_10_sum = 0;
            double prec_15_sum = 0;
            double ndcg_sum    = 0;

            // for counting the users and the evaluation lists
            int num_lists = 0;
            int num_users = 0;
            int last_user_id = -1;

            foreach (int user_id in relevant_users)
            {
                var filtered_items = GetFilteredItems(user_id, test, item_attributes);

                foreach (int attribute_id in filtered_items.Keys)
                {
                    // TODO optimize this a bit, currently it is quite naive
                    var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]);
                    relevant_filtered_items.IntersectWith(relevant_items);

                    var correct_items = new HashSet<int>(filtered_items[attribute_id]);
                    correct_items.IntersectWith(relevant_filtered_items);

                    // the number of items that are really relevant for this user
                    var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]);
                    relevant_items_in_train.IntersectWith(relevant_filtered_items);
                    int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count();

                    // skip all users that have 0 or #relevant_filtered_items test items
                    if (correct_items.Count == 0)
                        continue;
                    if (num_eval_items - correct_items.Count == 0)
                        continue;

                    // counting stats
                    num_lists++;
                    if (last_user_id != user_id)
                    {
                        last_user_id = user_id;
                        num_users++;
                    }

                    // evaluation
                    int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items);

                    auc_sum     += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]);
                    map_sum     += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]);
                    ndcg_sum    += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]);
                    prec_5_sum  += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id],  5);
                    prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
                    prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

                    if (prediction.Length != relevant_filtered_items.Count)
                        throw new Exception("Not all items have been ranked.");

                    if (num_lists % 1000 == 0)
                        Console.Error.Write(".");
                    if (num_lists % 20000 == 0)
                        Console.Error.WriteLine();
                }
            }

            var result = new Dictionary<string, double>();
            result.Add("AUC",     auc_sum / num_lists);
            result.Add("MAP",     map_sum / num_lists);
            result.Add("NDCG",    ndcg_sum / num_lists);
            result.Add("prec@5",  prec_5_sum / num_lists);
            result.Add("prec@10", prec_10_sum / num_lists);
            result.Add("prec@15", prec_15_sum / num_lists);
            result.Add("num_users", num_users);
            result.Add("num_lists", num_lists);
            result.Add("num_items", relevant_items.Count);

            return result;
        }
示例#16
0
 /// <summary>Get the transpose of the matrix, i.e. a matrix where rows and columns are interchanged</summary>
 /// <returns>the transpose of the matrix (copy)</returns>
 public IMatrix<bool> Transpose()
 {
     var transpose = new SparseBooleanMatrix();
     for (int i = 0; i < row_list.Count; i++)
         foreach (int j in this[i])
             transpose[j, i] = true;
     return transpose;
 }
示例#17
0
    static void LoadData()
    {
        TimeSpan loading_time = Utils.MeasureTime(delegate() {
            // training data
            training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping);

            // relevant users and items
            if (relevant_users_file != null)
                relevant_users = new HashSet<int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file))));
            else
                relevant_users = training_data.AllUsers;
            if (relevant_items_file != null)
                relevant_items = new HashSet<int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
            else
                relevant_items = training_data.AllItems;

            if (! (recommender is MyMediaLite.ItemRecommendation.Random))
                ((ItemRecommender)recommender).Feedback = training_data;

            // user attributes
            if (recommender is IUserAttributeAwareRecommender)
            {
                if (user_attributes_file == null)
                    Usage("Recommender expects --user-attributes=FILE.");
                else
                    ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            }

            // item attributes
            if (recommender is IItemAttributeAwareRecommender)
            {
                if (item_attributes_file == null)
                    Usage("Recommender expects --item-attributes=FILE.");
                else
                    ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            }
            if (filtered_eval)
            {
                if (item_attributes_file == null)
                    Usage("--filtered-evaluation expects --item-attributes=FILE.");
                else
                    item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            }

            // user relation
            if (recommender is IUserRelationAwareRecommender)
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --user-relation=FILE.");
                }
                else
                {
                    ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                    Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats
                }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --item-relation=FILE.");
                }
                else
                {
                    ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                    Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats
                }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                    test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping);
            }
            else
            {
                var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
    }
        public void TestOverlapCount()
        {
            var row25 = new int[] { 2, 5 };
            var row36 = new int[] { 3, 6 };
            var row15 = new int[] { 1, 5 };
            var matrix = new SparseBooleanMatrixStatic();
            matrix[2] = row25;
            matrix[4] = row36;
            matrix[5] = row15;

            var overlapMatrix = new SparseBooleanMatrix();
            overlapMatrix[2, 1] = true;
            overlapMatrix[2, 5] = true; // same entry
            overlapMatrix[4, 4] = true;
            overlapMatrix[4, 6] = true; // same entry
            overlapMatrix[5, 2] = true;
            overlapMatrix[5, 5] = true; // same entry

            Assert.AreEqual(3, matrix.Overlap(overlapMatrix));
        }
		[Test()] public void TestNonEmptyColumnIDs()
		{
			var matrix = new SparseBooleanMatrix();
			for (int i = 0; i < 5; i++)
				if (i != 2 && i !=3)
				{
					matrix[1, i] = true;
					matrix[4, i] = true;
				}

			Assert.AreEqual(3, matrix.NonEmptyColumnIDs.Count);

			ICollection<int> colIDs = matrix.NonEmptyColumnIDs;
			var colIDsEnum = colIDs.GetEnumerator();
			colIDsEnum.MoveNext();
			Assert.AreEqual(0, colIDsEnum.Current);
			colIDsEnum.MoveNext();
			Assert.AreEqual(1, colIDsEnum.Current);
			colIDsEnum.MoveNext();
			Assert.AreEqual(4, colIDsEnum.Current);
			Assert.IsFalse(colIDsEnum.MoveNext());
		}
示例#20
0
    static void LoadData(bool static_data)
    {
        training_file = Path.Combine(data_dir, training_file);

        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            // read training data
            if ((recommender is TimeAwareRatingPredictor || chronological_split != null) && file_format != RatingFileFormat.MOVIELENS_1M)
            {
                training_data = TimedRatingData.Read(training_file, user_mapping, item_mapping);
            }
            else
            {
                if (file_format == RatingFileFormat.DEFAULT)
                    training_data = static_data
                        ? StaticRatingData.Read(training_file, user_mapping, item_mapping, rating_type)
                        : RatingData.Read(training_file, user_mapping, item_mapping);
                else if(file_format == RatingFileFormat.IGNORE_FIRST_LINE)
                    training_data = static_data
                        ? StaticRatingData.Read(training_file, user_mapping, item_mapping, rating_type, true)
                        : RatingData.Read(training_file, user_mapping, item_mapping, true);
                else if (file_format == RatingFileFormat.MOVIELENS_1M)
                    training_data = MovieLensRatingData.Read(training_file, user_mapping, item_mapping);
                else if (file_format == RatingFileFormat.KDDCUP_2011)
                    training_data = MyMediaLite.IO.KDDCup2011.Ratings.Read(training_file);
            }
            recommender.Ratings = training_data;

            // user attributes
            if (user_attributes_file != null)
                user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            if (recommender is IUserAttributeAwareRecommender)
                ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes;

            // item attributes
            if (item_attributes_file != null)
                item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            if (recommender is IItemAttributeAwareRecommender)
                ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes;

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);
            }

            // read test data
            if (test_file != null)
            {
                test_file = Path.Combine(data_dir, test_file);

                if (recommender is TimeAwareRatingPredictor && file_format != RatingFileFormat.MOVIELENS_1M)
                    test_data = TimedRatingData.Read(test_file, user_mapping, item_mapping);
                else if (file_format == RatingFileFormat.MOVIELENS_1M)
                    test_data = MovieLensRatingData.Read(test_file, user_mapping, item_mapping);
                else if (file_format == RatingFileFormat.KDDCUP_2011)
                    test_data = MyMediaLite.IO.KDDCup2011.Ratings.Read(test_file);
                else
                    test_data = StaticRatingData.Read(test_file, user_mapping, item_mapping, rating_type, file_format == RatingFileFormat.IGNORE_FIRST_LINE);
            }
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
示例#21
0
        protected void Run(string[] args)
        {
            Console.WriteLine("WISER-RecSys começou");

            options = new OptionSet() {
                // string-valued options
                 { "arquivo=",            v              => arquivo             = v },
                { "measures=",            v              => measures             = v },
                { "recommender-options=", v              => recommender_options += " " + v },
                { "help",                 v => show_help         = v != null },

            };

            eval_measures = ItemRecommendationEvaluationResults.DefaultMeasuresToShow;

            IList<string> extra_args = options.Parse(args);

            if (show_help)
                Usage(0);

            //eval
            if (measures != null)
                eval_measures = measures.Split(' ', ',');

            //Rodar o de vocs

            //
            training_file = "training.data";
            test_file = "test.data";
            training_partial_file = "training.partial.data";
            test_partial_file = "test.partial.data";

            for (int i = 0; i < arquivos.Length; i++)
            {

                MyMediaLite.Random.Seed = 1;

                item_attributes_file = "movie_" + arquivos[i] + ".dat_saida";

                user_mapping.Add(new Mapping());
                item_mapping.Add(new Mapping());

                //Setup recommender
                recommenders.Add("BPRMFAttr".CreateItemRecommender());
                recommenders[i].Configure(recommender_options, (string msg) =>
                {
                    Console.Error.WriteLine(msg); Environment.Exit(-1);
                });

                // item attributes
                if (recommenders[i] is IItemAttributeAwareRecommender && item_attributes_file == null)
                    Abort("Recommender expects --item-attributes=FILE.");

                if (item_attributes_file != null)
                    item_attributes.Add(AttributeData.Read(item_attributes_file, item_mapping[i]));
                if (recommenders[i] is IItemAttributeAwareRecommender)
                    ((IItemAttributeAwareRecommender)recommenders[i]).ItemAttributes = item_attributes[i];

                IBooleanMatrix lista_vazia = new SparseBooleanMatrix();
                if (recommenders[i] is IUserAttributeAwareRecommender)
                    ((IUserAttributeAwareRecommender)recommenders[i]).UserAttributes = lista_vazia;

                // training data
                training_data.Add(ItemData.Read(training_file, user_mapping[i], item_mapping[i], false));

                test_data.Add(ItemData.Read(test_file, user_mapping[i], item_mapping[i], false));

                test_users.Add(test_data[i].AllUsers);

                //Probe

                training_probe_data.Add(ItemData.Read(training_partial_file, user_mapping[i], item_mapping[i], false));
                test_probe_data.Add(ItemData.Read(test_partial_file, user_mapping[i], item_mapping[i], false));

                if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender)
                    ((ItemRecommender)recommenders[i]).Feedback = training_probe_data[i];

                //Trainar
                Console.WriteLine("Vamos ao probe training");
                var train_time_span = Wrap.MeasureTime(delegate () { recommenders[i].Train(); });
                Console.WriteLine("training_time " + train_time_span + " ");

            }

            Evaluation evaluation = new Evaluation(recommenders, test_probe_data, training_probe_data);

            //Probe learn
            Console.WriteLine("Probe learn started");
            TimeSpan time_span = Wrap.MeasureTime(delegate () { evaluation.EvaluateProbe(test_users, user_mapping, item_mapping); });
            Console.WriteLine(" Probe learn time: " + time_span);

            for (int i = 0; i < arquivos.Length; i++)
            {

                MyMediaLite.Random.Seed = 1;

                item_attributes_file = "movie_" + arquivos[i] + ".dat_saida";

                //Setup recommender
                recommenders[i] = "BPRMFAttr".CreateItemRecommender();
                recommenders[i].Configure(recommender_options, (string msg) => { Console.Error.WriteLine(msg); Environment.Exit(-1); });

                // item attributes
                if (recommenders[i] is IItemAttributeAwareRecommender && item_attributes_file == null)
                    Abort("Recommender expects --item-attributes=FILE.");

                if (recommenders[i] is IItemAttributeAwareRecommender)
                    ((IItemAttributeAwareRecommender)recommenders[i]).ItemAttributes = item_attributes[i];

                IBooleanMatrix lista_vazia = new SparseBooleanMatrix();
                if (recommenders[i] is IUserAttributeAwareRecommender)
                    ((IUserAttributeAwareRecommender)recommenders[i]).UserAttributes = lista_vazia;

                if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender)
                    ((ItemRecommender)recommenders[i]).Feedback = training_data[i];

                //Trainar
                Console.WriteLine("Agora ao treino normal");
                var train_time_span = Wrap.MeasureTime(delegate () { recommenders[i].Train(); });
                Console.WriteLine("training_time " + train_time_span + " ");

            }

            var results = evaluation.Evaluate(test_data, training_data, test_users, user_mapping, item_mapping);

            foreach (EvaluationResults result in results)
            {
                Console.WriteLine(result.ToString());
            }

            Console.WriteLine("Press any key to continue...");
            Console.ReadKey();
        }
示例#22
0
        ///
        public override void LearnAttributeToFactorMapping()
        {
            random = Util.Random.GetInstance();

            // create helper data structure
            this.data_item = new SparseBooleanMatrix();
            for (int i = 0; i < ratings.Count; i++)
                data_item[ratings.Items[i], ratings.Users[i]] = true;

            // create attribute-to-factor weight matrix
            this.attribute_to_factor = new Matrix<double>(NumItemAttributes + 1, NumFactors + 1);
            // account for regression bias term, and the item bias that we want to model

            // store the results of the different runs in the following array
            var old_attribute_to_factor = new Matrix<double>[num_init_mapping];

            Console.Error.WriteLine("Will use {0} examples ...", num_iter_mapping * MaxItemID);

            var old_rmse_per_factor = new double[num_init_mapping][];

            for (int h = 0; h < num_init_mapping; h++)
            {
                MatrixUtils.InitNormal(attribute_to_factor, InitMean, InitStdev);
                Console.Error.WriteLine("----");

                for (int i = 0; i < num_iter_mapping * MaxItemID; i++)
                    IterateMapping();
                old_attribute_to_factor[h] = new Matrix<double>(attribute_to_factor);
                old_rmse_per_factor[h] = ComputeMappingFit();
            }

            var min_rmse_per_factor = new double[NumFactors + 1];
            for (int i = 0; i <= NumFactors; i++)
                min_rmse_per_factor[i] = Double.MaxValue;
            var best_factor_init       = new int[NumFactors + 1];

            // find best factor mappings:
            for (int i = 0; i < num_init_mapping; i++)
                for (int j = 0; j <= NumFactors; j++)
                    if (old_rmse_per_factor[i][j] < min_rmse_per_factor[j])
                    {
                        min_rmse_per_factor[j] = old_rmse_per_factor[i][j];
                        best_factor_init[j]    = i;
                    }

            // set the best weight combinations for each factor mapping
            for (int i = 0; i <= NumFactors; i++)
            {
                Console.Error.WriteLine("Factor {0}, pick {1}", i, best_factor_init[i]);

                attribute_to_factor.SetColumn(i,
                    old_attribute_to_factor[best_factor_init[i]].GetColumn(i)
                );
            }
        }
		[Test()] public void TestTranspose()
		{
			var matrix = new SparseBooleanMatrix();
			for (int i = 0; i < 7; i++)
				if(i != 2 && i != 4)
				{
					matrix[i, 1] = true;
					matrix[i, 4] = true;
				}
			matrix[2, 2] = true;
			matrix[2, 5] = true;
			matrix[4, 3] = true;
			// transpose the matrix
			var transposed_matrix = (IBooleanMatrix) matrix.Transpose();
			// test the transposed matrix
			Assert.IsTrue(transposed_matrix[1,0]);
			Assert.IsTrue(transposed_matrix[4, 6]);
			Assert.IsFalse(transposed_matrix[3, 1]);
			Assert.IsFalse(transposed_matrix[5, 4]);
		}
		[Test()] public void TestOverlapCount()
		{
			var matrix = new SparseBooleanMatrix();
			matrix[2, 2] = true;
			matrix[2, 5] = true;
			matrix[4, 3] = true;
			matrix[4, 6] = true;
			matrix[5, 1] = true;
			matrix[5, 5] = true;

			var overlapMatrix = new SparseBooleanMatrix();
			overlapMatrix[2, 1] = true;
			overlapMatrix[2, 5] = true; // same entry
			overlapMatrix[4, 4] = true;
			overlapMatrix[4, 6] = true; // same entry
			overlapMatrix[5, 2] = true;
			overlapMatrix[5, 5] = true; // same entry

			Assert.AreEqual(3, matrix.Overlap(overlapMatrix));
		}
		[Test()] public void TestNumEntriesByColumn()
		{
			var matrix = new SparseBooleanMatrix();
			for (int i = 0; i < 5; i++)
				if (i != 2 && i !=3)
				{
					matrix[i, 1] = true;
					matrix[i, 4] = true;
				}

			Assert.AreEqual(0, matrix.NumEntriesByColumn(0));
			Assert.AreEqual(3, matrix.NumEntriesByColumn(1));
			Assert.AreEqual(0, matrix.NumEntriesByColumn(2));
			Assert.AreEqual(0, matrix.NumEntriesByColumn(3));
			Assert.AreEqual(3, matrix.NumEntriesByColumn(4));
		}
示例#26
0
    static void LoadData()
    {
        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            // training data
            training_file = Path.Combine(data_dir, training_file);
            training_data = double.IsNaN(rating_threshold)
                ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);

            // user attributes
            if (user_attributes_file != null)
                user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            if (recommender is IUserAttributeAwareRecommender)
                ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes;

            // item attributes
            if (item_attributes_file != null)
                item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            if (recommender is IItemAttributeAwareRecommender)
                ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes;

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);
            }

            // user groups
            if (user_groups_file != null)
            {
                group_to_user = RelationData.Read(Path.Combine(data_dir, user_groups_file), user_mapping); // assumption: user and user group IDs are disjoint
                user_groups = group_to_user.NonEmptyRowIDs;
                Console.WriteLine("{0} user groups", user_groups.Count);
            }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_file = Path.Combine(data_dir, test_file);
                    test_data = double.IsNaN(rating_threshold)
                        ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                        : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);
                }
            }
            else
            {
                var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }

            if (group_method == "GroupsAsUsers")
            {
                Console.WriteLine("group recommendation strategy: {0}", group_method);
                // TODO verify what is going on here

                //var training_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                // transform groups to users
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in training_data.UserMatrix.GetEntriesByRow(user_id))
                            training_data.Add(group_id, item_id);
                // add the users that do not belong to groups

                //training_data = training_data_group;

                // transform groups to users
                var test_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in test_data.UserMatrix.GetEntriesByRow(user_id))
                            test_data_group.Add(group_id, item_id);

                test_data = test_data_group;

                group_method = null; // deactivate s.t. the normal eval routines are used
            }

            if (user_prediction)
            {
                // swap file names for test users and candidate items
                var ruf = test_users_file;
                var rif = candidate_items_file;
                test_users_file = rif;
                candidate_items_file = ruf;

                // swap user and item mappings
                var um = user_mapping;
                var im = item_mapping;
                user_mapping = im;
                item_mapping = um;

                // transpose training and test data
                training_data = training_data.Transpose();

                // transpose test data
                if (test_data != null)
                    test_data = test_data.Transpose();
            }

            if (recommender is MyMediaLite.ItemRecommendation.ItemRecommender)
                ((ItemRecommender)recommender).Feedback = training_data;

            // test users
            if (test_users_file != null)
                test_users = user_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray() );
            else
                test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers;

            // if necessary, perform user sampling
            if (num_test_users > 0 && num_test_users < test_users.Count)
            {
                var old_test_users = new HashSet<int>(test_users);
                var new_test_users = new int[num_test_users];
                for (int i = 0; i < num_test_users; i++)
                {
                    int random_index = MyMediaLite.Util.Random.GetInstance().Next(old_test_users.Count - 1);
                    new_test_users[i] = old_test_users.ElementAt(random_index);
                    old_test_users.Remove(new_test_users[i]);
                }
                test_users = new_test_users;
            }

            // candidate items
            if (candidate_items_file != null)
                candidate_items = item_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray() );
            else if (all_items)
                candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray();

            if (candidate_items != null)
                eval_item_mode = CandidateItems.EXPLICIT;
            else if (in_training_items)
                eval_item_mode = CandidateItems.TRAINING;
            else if (in_test_items)
                eval_item_mode = CandidateItems.TEST;
            else if (overlap_items)
                eval_item_mode = CandidateItems.OVERLAP;
            else
                eval_item_mode = CandidateItems.UNION;
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
示例#27
0
        /// <summary>Display data statistics for item recommendation datasets</summary>
        /// <param name="training_data">the training dataset</param>
        /// <param name="test_data">the test dataset</param>
        /// <param name="user_attributes">the user attributes</param>
        /// <param name="item_attributes">the item attributes</param>
        public static string Statistics(
			this IPosOnlyFeedback training_data, IPosOnlyFeedback test_data = null,
			SparseBooleanMatrix user_attributes = null, SparseBooleanMatrix item_attributes = null)
        {
            // training data stats
            int num_users = training_data.AllUsers.Count;
            int num_items = training_data.AllItems.Count;
            long matrix_size = (long) num_users * num_items;
            long empty_size  = (long) matrix_size - training_data.Count;
            double sparsity = (double) 100L * empty_size / matrix_size;
            string s = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, training_data.Count, sparsity);

            // test data stats
            if (test_data != null)
            {
                num_users = test_data.AllUsers.Count;
                num_items = test_data.AllItems.Count;
                matrix_size = (long) num_users * num_items;
                empty_size  = (long) matrix_size - test_data.Count;
                sparsity = (double) 100L * empty_size / matrix_size; // TODO depends on the eval scheme whether this is correct
                s += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, test_data.Count, sparsity);
            }

            return s + Statistics(user_attributes, item_attributes);
        }
示例#28
0
        /// <summary>Evaluation for rankings of items recommended to groups</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">group recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="group_to_user">group to user relation</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="ignore_overlap">if true, ignore items that appear for a group in the training set when evaluating for that user</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
			this GroupRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
			SparseBooleanMatrix group_to_user,
			ICollection<int> candidate_items,
			bool ignore_overlap = true)
        {
            var result = new ItemRecommendationEvaluationResults();

            int num_groups = 0;

            foreach (int group_id in group_to_user.NonEmptyRowIDs)
            {
                var users = group_to_user.GetEntriesByRow(group_id);

                var correct_items = new HashSet<int>();
                foreach (int user_id in users)
                    correct_items.UnionWith(test.UserMatrix[user_id]);
                correct_items.IntersectWith(candidate_items);

                var candidate_items_in_train = new HashSet<int>();
                foreach (int user_id in users)
                    candidate_items_in_train.UnionWith(train.UserMatrix[user_id]);
                candidate_items_in_train.IntersectWith(candidate_items);
                int num_eval_items = candidate_items.Count - (ignore_overlap ? candidate_items_in_train.Count() : 0);

                // skip all groups that have 0 or #candidate_items test items
                if (correct_items.Count == 0)
                    continue;
                if (num_eval_items - correct_items.Count == 0)
                    continue;

                IList<int> prediction_list = recommender.RankItems(users, candidate_items);
                if (prediction_list.Count != candidate_items.Count)
                    throw new Exception("Not all items have been ranked.");

                var ignore_items = ignore_overlap ? candidate_items_in_train : new HashSet<int>();

                double auc  = AUC.Compute(prediction_list, correct_items, ignore_items);
                double map  = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items);
                double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items);
                double rr   = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items);
                var positions = new int[] { 5, 10 };
                var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions);
                var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions);

                // thread-safe incrementing
                lock(result)
                {
                    num_groups++;
                    result["AUC"]       += (float) auc;
                    result["MAP"]       += (float) map;
                    result["NDCG"]      += (float) ndcg;
                    result["MRR"]       += (float) rr;
                    result["prec@5"]    += (float) prec[5];
                    result["prec@10"]   += (float) prec[10];
                    result["recall@5"]  += (float) recall[5];
                    result["recall@10"] += (float) recall[10];
                }

                if (num_groups % 1000 == 0)
                    Console.Error.Write(".");
                if (num_groups % 60000 == 0)
                    Console.Error.WriteLine();
            }

            result["num_groups"] = num_groups;
            result["num_lists"]  = num_groups;
            result["num_items"]  = candidate_items.Count;

            return result;
        }
示例#29
0
 /// <summary>Display statistics for user and item attributes</summary>
 /// <param name="user_attributes">the user attributes</param>
 /// <param name="item_attributes">the item attributes</param>
 public static string Statistics(SparseBooleanMatrix user_attributes, SparseBooleanMatrix item_attributes)
 {
     string s = string.Empty;
     if (user_attributes != null)
     {
         s += string.Format(
             "{0} user attributes for {1} users, {2} assignments, {3} users with attribute assignments\n",
             user_attributes.NumberOfColumns, user_attributes.NumberOfRows,
             user_attributes.NumberOfEntries, user_attributes.NonEmptyRowIDs.Count);
     }
     if (item_attributes != null)
         s += string.Format(
             "{0} item attributes for {1} items, {2} assignments, {3} items with attribute assignments\n",
             item_attributes.NonEmptyColumnIDs.Count, item_attributes.NumberOfRows,
             item_attributes.NumberOfEntries, item_attributes.NonEmptyRowIDs.Count);
     return s;
 }
示例#30
0
        /// <summary>Read binary attribute data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">an IDataReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the attribute data</returns>
        public static SparseBooleanMatrix Read(IDataReader reader, IEntityMapping mapping)
        {
            if (reader.FieldCount < 2)
                throw new Exception("Expected at least 2 columns.");

            var matrix = new SparseBooleanMatrix();

            while (!reader.Read())
            {
                int entity_id = mapping.ToInternalID(reader.GetString(0));
                int attr_id   = reader.GetInt32(1);

                matrix[entity_id, attr_id] = true;
            }

            return matrix;
        }
		[Test()] public void TestNumberOfEntries()
		{
			var matrix = new SparseBooleanMatrix();
			for (int i = 0; i < 5; i++)
				if (i != 2 && i != 4)
				{
					matrix[i, 1] = true;
					matrix[i, 4] = false;
				}
			Assert.AreEqual(3, matrix.NumberOfEntries);
		}