/// <summary>Read binary relation data from file</summary> /// <remarks> /// The expected (sparse) line format is: /// ENTITY_ID whitespace ENTITY_ID /// for the relations that hold. /// </remarks> /// <param name="reader">a StreamReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the relation data</returns> public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping) { var matrix = new SparseBooleanMatrix(); char[] split_chars = new char[]{ '\t', ' ' }; string line; while (!reader.EndOfStream) { line = reader.ReadLine(); // ignore empty lines if (line.Length == 0) continue; string[] tokens = line.Split(split_chars); if (tokens.Length != 2) throw new IOException("Expected exactly two columns: " + line); int entity1_id = mapping.ToInternalID(int.Parse(tokens[0])); int entity2_id = mapping.ToInternalID(int.Parse(tokens[1])); matrix[entity1_id, entity2_id] = true; } return matrix; }
[Test()] public void TestCreateMatrix() { var matrix = new SparseBooleanMatrix(); var other_matrix = matrix.CreateMatrix(2, 2); Assert.IsInstanceOf(matrix.GetType(), other_matrix); }
public void TestCreate() { var sparse_boolean_matrix = new SparseBooleanMatrix(); sparse_boolean_matrix[0, 1] = true; sparse_boolean_matrix[0, 4] = true; sparse_boolean_matrix[1, 0] = true; sparse_boolean_matrix[1, 2] = true; sparse_boolean_matrix[1, 4] = true; sparse_boolean_matrix[3, 1] = true; sparse_boolean_matrix[3, 3] = true; sparse_boolean_matrix[3, 4] = true; var correlation_matrix = BinaryCosine.Create(sparse_boolean_matrix); Assert.AreEqual(4, correlation_matrix.NumberOfRows); Assert.IsTrue(correlation_matrix.IsSymmetric); Assert.AreEqual(1 / Math.Sqrt(6), correlation_matrix[0, 1], delta); Assert.AreEqual(1 / Math.Sqrt(6), correlation_matrix[1, 0], delta); Assert.AreEqual(1 / 3d, correlation_matrix[1, 3], delta); Assert.AreEqual(0f, correlation_matrix[2, 0]); Assert.AreEqual(0f, correlation_matrix[2, 1]); Assert.AreEqual(1f, correlation_matrix[2, 2]); Assert.AreEqual(0f, correlation_matrix[2, 3]); Assert.AreEqual(0f, correlation_matrix[0, 2]); Assert.AreEqual(0f, correlation_matrix[1, 2]); Assert.AreEqual(0f, correlation_matrix[3, 2]); }
public void TestNumberOfColumns() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { matrix[i, 1]= true; matrix[i, 4]= true; } Assert.AreEqual(5, matrix.NumberOfColumns); }
public override void Train() { filtered_items_by_user = new Dictionary<int, ICollection<int>>[MaxUserID + 1]; items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose(); Console.Error.WriteLine("max_user_id {0} max_item_id {1}", MaxUserID, MaxItemID); for (int u = 0; u < filtered_items_by_user.Count; u++) filtered_items_by_user[u] = ItemsFiltered.GetFilteredItems(u, Feedback, ItemAttributes); base.Train(); }
/// <summary>Get the transpose of the matrix, i.e. a matrix where rows and columns are interchanged</summary> /// <returns>the transpose of the matrix (copy)</returns> public IMatrix <bool> Transpose() { var transpose = new SparseBooleanMatrix(); for (int i = 0; i < row_list.Count; i++) { foreach (int j in this[i]) { transpose[j, i] = true; } } return(transpose); }
public void TestNonEmptyRows() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) if (i != 2) { matrix[i, 1]= true; matrix[i, 4]= true; } Assert.IsTrue(matrix[0, 1]); IList<KeyValuePair<int, HashSet<int>>> nonEmptyRows = matrix.NonEmptyRows; Assert.AreEqual(4, nonEmptyRows.Count); // TODO test contents }
/// <summary>Display dataset statistics</summary> /// <param name="train">the training data</param> /// <param name="test">the test data</param> /// <param name="user_attributes">the user attributes</param> /// <param name="item_attributes">the item attributes</param> /// <param name="display_overlap">if set true, display the user/item overlap between train and test</param> public static string Statistics( this IRatings train, IRatings test = null, SparseBooleanMatrix user_attributes = null, SparseBooleanMatrix item_attributes = null, bool display_overlap = false) { // training data stats int num_users = train.AllUsers.Count; int num_items = train.AllItems.Count; long matrix_size = (long) num_users * num_items; long empty_size = (long) matrix_size - train.Count; double sparsity = (double) 100L * empty_size / matrix_size; string s = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, train.Count, sparsity); if (train is ITimedRatings) { var time_train = train as ITimedRatings; s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_train.EarliestTime, time_train.LatestTime); } // test data stats if (test != null) { num_users = test.AllUsers.Count; num_items = test.AllItems.Count; matrix_size = (long) num_users * num_items; empty_size = (long) matrix_size - test.Count; // TODO depends on the eval scheme whether this is correct sparsity = (double) 100L * empty_size / matrix_size; s += string.Format(CultureInfo.InvariantCulture, "test data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, test.Count, sparsity); if (test is ITimedRatings) { var time_test = test as ITimedRatings; s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_test.EarliestTime, time_test.LatestTime); } } // count and display the overlap between train and test if (display_overlap && test != null) { int num_new_users = 0; int num_new_items = 0; TimeSpan seconds = Util.Wrap.MeasureTime(delegate() { num_new_users = test.AllUsers.Except(train.AllUsers).Count(); num_new_items = test.AllItems.Except(train.AllItems).Count(); }); s += string.Format("{0} new users, {1} new items ({2} seconds)\n", num_new_users, num_new_items, seconds); } return s + Statistics(user_attributes, item_attributes); }
[Test()] public void TestGetEntriesByRow() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) if (i != 2 && i !=3) { matrix[i, 1] = true; matrix[i, 4] = true; } Assert.AreEqual(2, matrix.GetEntriesByRow(0).Count); Assert.AreEqual(2, matrix.GetEntriesByRow(1).Count); Assert.AreEqual(0, matrix.GetEntriesByRow(2).Count); Assert.AreEqual(0, matrix.GetEntriesByRow(3).Count); Assert.AreEqual(2, matrix.GetEntriesByRow(4).Count); }
[Test()] public void TestIsSymmetric() { var matrix = new SparseBooleanMatrix(); Assert.IsTrue(matrix.IsSymmetric); matrix[1, 1] = true; Assert.IsTrue(matrix.IsSymmetric); matrix[2, 1] = true; Assert.IsFalse(matrix.IsSymmetric); matrix[1, 2] = true; Assert.IsTrue(matrix.IsSymmetric); matrix[2, 1] = false; Assert.IsFalse(matrix.IsSymmetric); }
public void TestCreate() { // create test objects var sparse_boolean_matrix = new SparseBooleanMatrix(); sparse_boolean_matrix[0, 1] = true; sparse_boolean_matrix[0, 4] = true; sparse_boolean_matrix[1, 0] = true; sparse_boolean_matrix[1, 2] = true; sparse_boolean_matrix[1, 4] = true; sparse_boolean_matrix[3, 1] = true; sparse_boolean_matrix[3, 3] = true; sparse_boolean_matrix[3, 4] = true; // test var correlation_matrix = BinaryCosine.Create(sparse_boolean_matrix); Assert.AreEqual(Math.Round(1 / Math.Sqrt(6), 4), Math.Round(correlation_matrix[0, 1], 4)); Assert.AreEqual(Math.Round(1 / Math.Sqrt(6), 4), Math.Round(correlation_matrix[1, 0], 4)); Assert.AreEqual(Math.Round(1 / 3d, 4), Math.Round(correlation_matrix[1, 3], 4)); }
public void TestComputeCorrelations() { var sparse_boolean_matrix = new SparseBooleanMatrix(); sparse_boolean_matrix[0, 1] = true; sparse_boolean_matrix[0, 4] = true; sparse_boolean_matrix[1, 0] = true; sparse_boolean_matrix[1, 2] = true; sparse_boolean_matrix[1, 4] = true; sparse_boolean_matrix[3, 1] = true; sparse_boolean_matrix[3, 3] = true; sparse_boolean_matrix[3, 4] = true; var correlation = new BinaryCosine(4); correlation.ComputeCorrelations(sparse_boolean_matrix); Assert.AreEqual(1 / Math.Sqrt(6), correlation[0, 1], delta); Assert.AreEqual(1 / Math.Sqrt(6), correlation[1, 0], delta); Assert.AreEqual(1 / 3d, correlation[1, 3], delta); }
public void TestNonEmptyRowIDs() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) if (i != 2 && i !=3) { matrix[i, 1]= true; matrix[i, 4]= true; } ICollection<int> rowIDs = matrix.NonEmptyRowIDs; IEnumerator <int> rowIDsEnum = rowIDs.GetEnumerator(); rowIDsEnum.MoveNext(); Assert.AreEqual(0, rowIDsEnum.Current); rowIDsEnum.MoveNext(); Assert.AreEqual(1, rowIDsEnum.Current); rowIDsEnum.MoveNext(); rowIDsEnum.MoveNext(); Assert.AreEqual(4, rowIDsEnum.Current); Assert.IsFalse(rowIDsEnum.MoveNext()); }
/// <summary>Read binary attribute data from a StreamReader</summary> /// <remarks> /// The expected (sparse) line format is: /// ENTITY_ID tab/space/comma ATTRIBUTE_ID /// for the relations that hold. /// </remarks> /// <param name="reader">a StreamReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the attribute data</returns> public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping) { var matrix = new SparseBooleanMatrix(); string line; while ((line = reader.ReadLine()) != null) { // ignore empty lines if (line.Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length != 2) throw new FormatException("Expected exactly 2 columns: " + line); int entity_id = mapping.ToInternalID(tokens[0]); int attr_id = int.Parse(tokens[1]); matrix[entity_id, attr_id] = true; } return matrix; }
/// <summary>Evaluation for rankings of filtered items</summary> /// <remarks> /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data</param> /// <param name="item_attributes">the item attributes to be used for filtering</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results</returns> public static Dictionary<string, double> Evaluate( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, SparseBooleanMatrix item_attributes, ICollection<int> relevant_users, ICollection<int> relevant_items) { if (train.Overlap(test) > 0) Console.Error.WriteLine("WARNING: Overlapping train and test data"); SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose(); // compute evaluation measures double auc_sum = 0; double map_sum = 0; double prec_5_sum = 0; double prec_10_sum = 0; double prec_15_sum = 0; double ndcg_sum = 0; // for counting the users and the evaluation lists int num_lists = 0; int num_users = 0; int last_user_id = -1; foreach (int user_id in relevant_users) { var filtered_items = GetFilteredItems(user_id, test, item_attributes); foreach (int attribute_id in filtered_items.Keys) { // TODO optimize this a bit, currently it is quite naive var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]); relevant_filtered_items.IntersectWith(relevant_items); var correct_items = new HashSet<int>(filtered_items[attribute_id]); correct_items.IntersectWith(relevant_filtered_items); // the number of items that are really relevant for this user var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]); relevant_items_in_train.IntersectWith(relevant_filtered_items); int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count(); // skip all users that have 0 or #relevant_filtered_items test items if (correct_items.Count == 0) continue; if (num_eval_items - correct_items.Count == 0) continue; // counting stats num_lists++; if (last_user_id != user_id) { last_user_id = user_id; num_users++; } // evaluation int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items); auc_sum += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]); map_sum += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]); ndcg_sum += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]); prec_5_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5); prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10); prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15); if (prediction.Length != relevant_filtered_items.Count) throw new Exception("Not all items have been ranked."); if (num_lists % 1000 == 0) Console.Error.Write("."); if (num_lists % 20000 == 0) Console.Error.WriteLine(); } } var result = new Dictionary<string, double>(); result.Add("AUC", auc_sum / num_lists); result.Add("MAP", map_sum / num_lists); result.Add("NDCG", ndcg_sum / num_lists); result.Add("prec@5", prec_5_sum / num_lists); result.Add("prec@10", prec_10_sum / num_lists); result.Add("prec@15", prec_15_sum / num_lists); result.Add("num_users", num_users); result.Add("num_lists", num_lists); result.Add("num_items", relevant_items.Count); return result; }
/// <summary>Get the transpose of the matrix, i.e. a matrix where rows and columns are interchanged</summary> /// <returns>the transpose of the matrix (copy)</returns> public IMatrix<bool> Transpose() { var transpose = new SparseBooleanMatrix(); for (int i = 0; i < row_list.Count; i++) foreach (int j in this[i]) transpose[j, i] = true; return transpose; }
static void LoadData() { TimeSpan loading_time = Utils.MeasureTime(delegate() { // training data training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping); // relevant users and items if (relevant_users_file != null) relevant_users = new HashSet<int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file)))); else relevant_users = training_data.AllUsers; if (relevant_items_file != null) relevant_items = new HashSet<int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file)))); else relevant_items = training_data.AllItems; if (! (recommender is MyMediaLite.ItemRecommendation.Random)) ((ItemRecommender)recommender).Feedback = training_data; // user attributes if (recommender is IUserAttributeAwareRecommender) { if (user_attributes_file == null) Usage("Recommender expects --user-attributes=FILE."); else ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); } // item attributes if (recommender is IItemAttributeAwareRecommender) { if (item_attributes_file == null) Usage("Recommender expects --item-attributes=FILE."); else ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } if (filtered_eval) { if (item_attributes_file == null) Usage("--filtered-evaluation expects --item-attributes=FILE."); else item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } // user relation if (recommender is IUserRelationAwareRecommender) if (user_relations_file == null) { Usage("Recommender expects --user-relation=FILE."); } else { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats } // item relation if (recommender is IItemRelationAwareRecommender) if (user_relations_file == null) { Usage("Recommender expects --item-relation=FILE."); } else { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats } // test data if (test_ratio == 0) { if (test_file != null) test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping); } else { var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); }
public void TestOverlapCount() { var row25 = new int[] { 2, 5 }; var row36 = new int[] { 3, 6 }; var row15 = new int[] { 1, 5 }; var matrix = new SparseBooleanMatrixStatic(); matrix[2] = row25; matrix[4] = row36; matrix[5] = row15; var overlapMatrix = new SparseBooleanMatrix(); overlapMatrix[2, 1] = true; overlapMatrix[2, 5] = true; // same entry overlapMatrix[4, 4] = true; overlapMatrix[4, 6] = true; // same entry overlapMatrix[5, 2] = true; overlapMatrix[5, 5] = true; // same entry Assert.AreEqual(3, matrix.Overlap(overlapMatrix)); }
[Test()] public void TestNonEmptyColumnIDs() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) if (i != 2 && i !=3) { matrix[1, i] = true; matrix[4, i] = true; } Assert.AreEqual(3, matrix.NonEmptyColumnIDs.Count); ICollection<int> colIDs = matrix.NonEmptyColumnIDs; var colIDsEnum = colIDs.GetEnumerator(); colIDsEnum.MoveNext(); Assert.AreEqual(0, colIDsEnum.Current); colIDsEnum.MoveNext(); Assert.AreEqual(1, colIDsEnum.Current); colIDsEnum.MoveNext(); Assert.AreEqual(4, colIDsEnum.Current); Assert.IsFalse(colIDsEnum.MoveNext()); }
static void LoadData(bool static_data) { training_file = Path.Combine(data_dir, training_file); TimeSpan loading_time = Wrap.MeasureTime(delegate() { // read training data if ((recommender is TimeAwareRatingPredictor || chronological_split != null) && file_format != RatingFileFormat.MOVIELENS_1M) { training_data = TimedRatingData.Read(training_file, user_mapping, item_mapping); } else { if (file_format == RatingFileFormat.DEFAULT) training_data = static_data ? StaticRatingData.Read(training_file, user_mapping, item_mapping, rating_type) : RatingData.Read(training_file, user_mapping, item_mapping); else if(file_format == RatingFileFormat.IGNORE_FIRST_LINE) training_data = static_data ? StaticRatingData.Read(training_file, user_mapping, item_mapping, rating_type, true) : RatingData.Read(training_file, user_mapping, item_mapping, true); else if (file_format == RatingFileFormat.MOVIELENS_1M) training_data = MovieLensRatingData.Read(training_file, user_mapping, item_mapping); else if (file_format == RatingFileFormat.KDDCUP_2011) training_data = MyMediaLite.IO.KDDCup2011.Ratings.Read(training_file); } recommender.Ratings = training_data; // user attributes if (user_attributes_file != null) user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); if (recommender is IUserAttributeAwareRecommender) ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes; // item attributes if (item_attributes_file != null) item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); if (recommender is IItemAttributeAwareRecommender) ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes; // user relation if (recommender is IUserRelationAwareRecommender) { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); } // item relation if (recommender is IItemRelationAwareRecommender) { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); } // read test data if (test_file != null) { test_file = Path.Combine(data_dir, test_file); if (recommender is TimeAwareRatingPredictor && file_format != RatingFileFormat.MOVIELENS_1M) test_data = TimedRatingData.Read(test_file, user_mapping, item_mapping); else if (file_format == RatingFileFormat.MOVIELENS_1M) test_data = MovieLensRatingData.Read(test_file, user_mapping, item_mapping); else if (file_format == RatingFileFormat.KDDCUP_2011) test_data = MyMediaLite.IO.KDDCup2011.Ratings.Read(test_file); else test_data = StaticRatingData.Read(test_file, user_mapping, item_mapping, rating_type, file_format == RatingFileFormat.IGNORE_FIRST_LINE); } }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0:0.##}", loading_time.TotalSeconds)); Console.Error.WriteLine("memory {0}", Memory.Usage); }
protected void Run(string[] args) { Console.WriteLine("WISER-RecSys começou"); options = new OptionSet() { // string-valued options { "arquivo=", v => arquivo = v }, { "measures=", v => measures = v }, { "recommender-options=", v => recommender_options += " " + v }, { "help", v => show_help = v != null }, }; eval_measures = ItemRecommendationEvaluationResults.DefaultMeasuresToShow; IList<string> extra_args = options.Parse(args); if (show_help) Usage(0); //eval if (measures != null) eval_measures = measures.Split(' ', ','); //Rodar o de vocs // training_file = "training.data"; test_file = "test.data"; training_partial_file = "training.partial.data"; test_partial_file = "test.partial.data"; for (int i = 0; i < arquivos.Length; i++) { MyMediaLite.Random.Seed = 1; item_attributes_file = "movie_" + arquivos[i] + ".dat_saida"; user_mapping.Add(new Mapping()); item_mapping.Add(new Mapping()); //Setup recommender recommenders.Add("BPRMFAttr".CreateItemRecommender()); recommenders[i].Configure(recommender_options, (string msg) => { Console.Error.WriteLine(msg); Environment.Exit(-1); }); // item attributes if (recommenders[i] is IItemAttributeAwareRecommender && item_attributes_file == null) Abort("Recommender expects --item-attributes=FILE."); if (item_attributes_file != null) item_attributes.Add(AttributeData.Read(item_attributes_file, item_mapping[i])); if (recommenders[i] is IItemAttributeAwareRecommender) ((IItemAttributeAwareRecommender)recommenders[i]).ItemAttributes = item_attributes[i]; IBooleanMatrix lista_vazia = new SparseBooleanMatrix(); if (recommenders[i] is IUserAttributeAwareRecommender) ((IUserAttributeAwareRecommender)recommenders[i]).UserAttributes = lista_vazia; // training data training_data.Add(ItemData.Read(training_file, user_mapping[i], item_mapping[i], false)); test_data.Add(ItemData.Read(test_file, user_mapping[i], item_mapping[i], false)); test_users.Add(test_data[i].AllUsers); //Probe training_probe_data.Add(ItemData.Read(training_partial_file, user_mapping[i], item_mapping[i], false)); test_probe_data.Add(ItemData.Read(test_partial_file, user_mapping[i], item_mapping[i], false)); if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender) ((ItemRecommender)recommenders[i]).Feedback = training_probe_data[i]; //Trainar Console.WriteLine("Vamos ao probe training"); var train_time_span = Wrap.MeasureTime(delegate () { recommenders[i].Train(); }); Console.WriteLine("training_time " + train_time_span + " "); } Evaluation evaluation = new Evaluation(recommenders, test_probe_data, training_probe_data); //Probe learn Console.WriteLine("Probe learn started"); TimeSpan time_span = Wrap.MeasureTime(delegate () { evaluation.EvaluateProbe(test_users, user_mapping, item_mapping); }); Console.WriteLine(" Probe learn time: " + time_span); for (int i = 0; i < arquivos.Length; i++) { MyMediaLite.Random.Seed = 1; item_attributes_file = "movie_" + arquivos[i] + ".dat_saida"; //Setup recommender recommenders[i] = "BPRMFAttr".CreateItemRecommender(); recommenders[i].Configure(recommender_options, (string msg) => { Console.Error.WriteLine(msg); Environment.Exit(-1); }); // item attributes if (recommenders[i] is IItemAttributeAwareRecommender && item_attributes_file == null) Abort("Recommender expects --item-attributes=FILE."); if (recommenders[i] is IItemAttributeAwareRecommender) ((IItemAttributeAwareRecommender)recommenders[i]).ItemAttributes = item_attributes[i]; IBooleanMatrix lista_vazia = new SparseBooleanMatrix(); if (recommenders[i] is IUserAttributeAwareRecommender) ((IUserAttributeAwareRecommender)recommenders[i]).UserAttributes = lista_vazia; if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender) ((ItemRecommender)recommenders[i]).Feedback = training_data[i]; //Trainar Console.WriteLine("Agora ao treino normal"); var train_time_span = Wrap.MeasureTime(delegate () { recommenders[i].Train(); }); Console.WriteLine("training_time " + train_time_span + " "); } var results = evaluation.Evaluate(test_data, training_data, test_users, user_mapping, item_mapping); foreach (EvaluationResults result in results) { Console.WriteLine(result.ToString()); } Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
/// public override void LearnAttributeToFactorMapping() { random = Util.Random.GetInstance(); // create helper data structure this.data_item = new SparseBooleanMatrix(); for (int i = 0; i < ratings.Count; i++) data_item[ratings.Items[i], ratings.Users[i]] = true; // create attribute-to-factor weight matrix this.attribute_to_factor = new Matrix<double>(NumItemAttributes + 1, NumFactors + 1); // account for regression bias term, and the item bias that we want to model // store the results of the different runs in the following array var old_attribute_to_factor = new Matrix<double>[num_init_mapping]; Console.Error.WriteLine("Will use {0} examples ...", num_iter_mapping * MaxItemID); var old_rmse_per_factor = new double[num_init_mapping][]; for (int h = 0; h < num_init_mapping; h++) { MatrixUtils.InitNormal(attribute_to_factor, InitMean, InitStdev); Console.Error.WriteLine("----"); for (int i = 0; i < num_iter_mapping * MaxItemID; i++) IterateMapping(); old_attribute_to_factor[h] = new Matrix<double>(attribute_to_factor); old_rmse_per_factor[h] = ComputeMappingFit(); } var min_rmse_per_factor = new double[NumFactors + 1]; for (int i = 0; i <= NumFactors; i++) min_rmse_per_factor[i] = Double.MaxValue; var best_factor_init = new int[NumFactors + 1]; // find best factor mappings: for (int i = 0; i < num_init_mapping; i++) for (int j = 0; j <= NumFactors; j++) if (old_rmse_per_factor[i][j] < min_rmse_per_factor[j]) { min_rmse_per_factor[j] = old_rmse_per_factor[i][j]; best_factor_init[j] = i; } // set the best weight combinations for each factor mapping for (int i = 0; i <= NumFactors; i++) { Console.Error.WriteLine("Factor {0}, pick {1}", i, best_factor_init[i]); attribute_to_factor.SetColumn(i, old_attribute_to_factor[best_factor_init[i]].GetColumn(i) ); } }
[Test()] public void TestTranspose() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 7; i++) if(i != 2 && i != 4) { matrix[i, 1] = true; matrix[i, 4] = true; } matrix[2, 2] = true; matrix[2, 5] = true; matrix[4, 3] = true; // transpose the matrix var transposed_matrix = (IBooleanMatrix) matrix.Transpose(); // test the transposed matrix Assert.IsTrue(transposed_matrix[1,0]); Assert.IsTrue(transposed_matrix[4, 6]); Assert.IsFalse(transposed_matrix[3, 1]); Assert.IsFalse(transposed_matrix[5, 4]); }
[Test()] public void TestOverlapCount() { var matrix = new SparseBooleanMatrix(); matrix[2, 2] = true; matrix[2, 5] = true; matrix[4, 3] = true; matrix[4, 6] = true; matrix[5, 1] = true; matrix[5, 5] = true; var overlapMatrix = new SparseBooleanMatrix(); overlapMatrix[2, 1] = true; overlapMatrix[2, 5] = true; // same entry overlapMatrix[4, 4] = true; overlapMatrix[4, 6] = true; // same entry overlapMatrix[5, 2] = true; overlapMatrix[5, 5] = true; // same entry Assert.AreEqual(3, matrix.Overlap(overlapMatrix)); }
[Test()] public void TestNumEntriesByColumn() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) if (i != 2 && i !=3) { matrix[i, 1] = true; matrix[i, 4] = true; } Assert.AreEqual(0, matrix.NumEntriesByColumn(0)); Assert.AreEqual(3, matrix.NumEntriesByColumn(1)); Assert.AreEqual(0, matrix.NumEntriesByColumn(2)); Assert.AreEqual(0, matrix.NumEntriesByColumn(3)); Assert.AreEqual(3, matrix.NumEntriesByColumn(4)); }
static void LoadData() { TimeSpan loading_time = Wrap.MeasureTime(delegate() { // training data training_file = Path.Combine(data_dir, training_file); training_data = double.IsNaN(rating_threshold) ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); // user attributes if (user_attributes_file != null) user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); if (recommender is IUserAttributeAwareRecommender) ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes; // item attributes if (item_attributes_file != null) item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); if (recommender is IItemAttributeAwareRecommender) ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes; // user relation if (recommender is IUserRelationAwareRecommender) { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); } // item relation if (recommender is IItemRelationAwareRecommender) { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); } // user groups if (user_groups_file != null) { group_to_user = RelationData.Read(Path.Combine(data_dir, user_groups_file), user_mapping); // assumption: user and user group IDs are disjoint user_groups = group_to_user.NonEmptyRowIDs; Console.WriteLine("{0} user groups", user_groups.Count); } // test data if (test_ratio == 0) { if (test_file != null) { test_file = Path.Combine(data_dir, test_file); test_data = double.IsNaN(rating_threshold) ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); } } else { var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } if (group_method == "GroupsAsUsers") { Console.WriteLine("group recommendation strategy: {0}", group_method); // TODO verify what is going on here //var training_data_group = new PosOnlyFeedback<SparseBooleanMatrix>(); // transform groups to users foreach (int group_id in group_to_user.NonEmptyRowIDs) foreach (int user_id in group_to_user[group_id]) foreach (int item_id in training_data.UserMatrix.GetEntriesByRow(user_id)) training_data.Add(group_id, item_id); // add the users that do not belong to groups //training_data = training_data_group; // transform groups to users var test_data_group = new PosOnlyFeedback<SparseBooleanMatrix>(); foreach (int group_id in group_to_user.NonEmptyRowIDs) foreach (int user_id in group_to_user[group_id]) foreach (int item_id in test_data.UserMatrix.GetEntriesByRow(user_id)) test_data_group.Add(group_id, item_id); test_data = test_data_group; group_method = null; // deactivate s.t. the normal eval routines are used } if (user_prediction) { // swap file names for test users and candidate items var ruf = test_users_file; var rif = candidate_items_file; test_users_file = rif; candidate_items_file = ruf; // swap user and item mappings var um = user_mapping; var im = item_mapping; user_mapping = im; item_mapping = um; // transpose training and test data training_data = training_data.Transpose(); // transpose test data if (test_data != null) test_data = test_data.Transpose(); } if (recommender is MyMediaLite.ItemRecommendation.ItemRecommender) ((ItemRecommender)recommender).Feedback = training_data; // test users if (test_users_file != null) test_users = user_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray() ); else test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers; // if necessary, perform user sampling if (num_test_users > 0 && num_test_users < test_users.Count) { var old_test_users = new HashSet<int>(test_users); var new_test_users = new int[num_test_users]; for (int i = 0; i < num_test_users; i++) { int random_index = MyMediaLite.Util.Random.GetInstance().Next(old_test_users.Count - 1); new_test_users[i] = old_test_users.ElementAt(random_index); old_test_users.Remove(new_test_users[i]); } test_users = new_test_users; } // candidate items if (candidate_items_file != null) candidate_items = item_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray() ); else if (all_items) candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray(); if (candidate_items != null) eval_item_mode = CandidateItems.EXPLICIT; else if (in_training_items) eval_item_mode = CandidateItems.TRAINING; else if (in_test_items) eval_item_mode = CandidateItems.TEST; else if (overlap_items) eval_item_mode = CandidateItems.OVERLAP; else eval_item_mode = CandidateItems.UNION; }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); Console.Error.WriteLine("memory {0}", Memory.Usage); }
/// <summary>Display data statistics for item recommendation datasets</summary> /// <param name="training_data">the training dataset</param> /// <param name="test_data">the test dataset</param> /// <param name="user_attributes">the user attributes</param> /// <param name="item_attributes">the item attributes</param> public static string Statistics( this IPosOnlyFeedback training_data, IPosOnlyFeedback test_data = null, SparseBooleanMatrix user_attributes = null, SparseBooleanMatrix item_attributes = null) { // training data stats int num_users = training_data.AllUsers.Count; int num_items = training_data.AllItems.Count; long matrix_size = (long) num_users * num_items; long empty_size = (long) matrix_size - training_data.Count; double sparsity = (double) 100L * empty_size / matrix_size; string s = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, training_data.Count, sparsity); // test data stats if (test_data != null) { num_users = test_data.AllUsers.Count; num_items = test_data.AllItems.Count; matrix_size = (long) num_users * num_items; empty_size = (long) matrix_size - test_data.Count; sparsity = (double) 100L * empty_size / matrix_size; // TODO depends on the eval scheme whether this is correct s += string.Format(CultureInfo.InvariantCulture, "test data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, test_data.Count, sparsity); } return s + Statistics(user_attributes, item_attributes); }
/// <summary>Evaluation for rankings of items recommended to groups</summary> /// <remarks> /// </remarks> /// <param name="recommender">group recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data</param> /// <param name="group_to_user">group to user relation</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="ignore_overlap">if true, ignore items that appear for a group in the training set when evaluating for that user</param> /// <returns>a dictionary containing the evaluation results</returns> public static ItemRecommendationEvaluationResults Evaluate( this GroupRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, SparseBooleanMatrix group_to_user, ICollection<int> candidate_items, bool ignore_overlap = true) { var result = new ItemRecommendationEvaluationResults(); int num_groups = 0; foreach (int group_id in group_to_user.NonEmptyRowIDs) { var users = group_to_user.GetEntriesByRow(group_id); var correct_items = new HashSet<int>(); foreach (int user_id in users) correct_items.UnionWith(test.UserMatrix[user_id]); correct_items.IntersectWith(candidate_items); var candidate_items_in_train = new HashSet<int>(); foreach (int user_id in users) candidate_items_in_train.UnionWith(train.UserMatrix[user_id]); candidate_items_in_train.IntersectWith(candidate_items); int num_eval_items = candidate_items.Count - (ignore_overlap ? candidate_items_in_train.Count() : 0); // skip all groups that have 0 or #candidate_items test items if (correct_items.Count == 0) continue; if (num_eval_items - correct_items.Count == 0) continue; IList<int> prediction_list = recommender.RankItems(users, candidate_items); if (prediction_list.Count != candidate_items.Count) throw new Exception("Not all items have been ranked."); var ignore_items = ignore_overlap ? candidate_items_in_train : new HashSet<int>(); double auc = AUC.Compute(prediction_list, correct_items, ignore_items); double map = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items); double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items); double rr = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items); var positions = new int[] { 5, 10 }; var prec = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions); var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions); // thread-safe incrementing lock(result) { num_groups++; result["AUC"] += (float) auc; result["MAP"] += (float) map; result["NDCG"] += (float) ndcg; result["MRR"] += (float) rr; result["prec@5"] += (float) prec[5]; result["prec@10"] += (float) prec[10]; result["recall@5"] += (float) recall[5]; result["recall@10"] += (float) recall[10]; } if (num_groups % 1000 == 0) Console.Error.Write("."); if (num_groups % 60000 == 0) Console.Error.WriteLine(); } result["num_groups"] = num_groups; result["num_lists"] = num_groups; result["num_items"] = candidate_items.Count; return result; }
/// <summary>Display statistics for user and item attributes</summary> /// <param name="user_attributes">the user attributes</param> /// <param name="item_attributes">the item attributes</param> public static string Statistics(SparseBooleanMatrix user_attributes, SparseBooleanMatrix item_attributes) { string s = string.Empty; if (user_attributes != null) { s += string.Format( "{0} user attributes for {1} users, {2} assignments, {3} users with attribute assignments\n", user_attributes.NumberOfColumns, user_attributes.NumberOfRows, user_attributes.NumberOfEntries, user_attributes.NonEmptyRowIDs.Count); } if (item_attributes != null) s += string.Format( "{0} item attributes for {1} items, {2} assignments, {3} items with attribute assignments\n", item_attributes.NonEmptyColumnIDs.Count, item_attributes.NumberOfRows, item_attributes.NumberOfEntries, item_attributes.NonEmptyRowIDs.Count); return s; }
/// <summary>Read binary attribute data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">an IDataReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the attribute data</returns> public static SparseBooleanMatrix Read(IDataReader reader, IEntityMapping mapping) { if (reader.FieldCount < 2) throw new Exception("Expected at least 2 columns."); var matrix = new SparseBooleanMatrix(); while (!reader.Read()) { int entity_id = mapping.ToInternalID(reader.GetString(0)); int attr_id = reader.GetInt32(1); matrix[entity_id, attr_id] = true; } return matrix; }
[Test()] public void TestNumberOfEntries() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) if (i != 2 && i != 4) { matrix[i, 1] = true; matrix[i, 4] = false; } Assert.AreEqual(3, matrix.NumberOfEntries); }