static void LoadData() { TimeSpan loading_time = Utils.MeasureTime(delegate() { // training data training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping); // relevant users and items if (relevant_users_file != null) { relevant_users = new HashSet <int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file)))); } else { relevant_users = training_data.AllUsers; } if (relevant_items_file != null) { relevant_items = new HashSet <int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file)))); } else { relevant_items = training_data.AllItems; } if (!(recommender is MyMediaLite.ItemRecommendation.Random)) { ((ItemRecommender)recommender).Feedback = training_data; } // user attributes if (recommender is IUserAttributeAwareRecommender) { if (user_attributes_file == null) { Usage("Recommender expects --user-attributes=FILE."); } else { ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); } } // item attributes if (recommender is IItemAttributeAwareRecommender) { if (item_attributes_file == null) { Usage("Recommender expects --item-attributes=FILE."); } else { ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } } if (filtered_eval) { if (item_attributes_file == null) { Usage("--filtered-evaluation expects --item-attributes=FILE."); } else { item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } } // user relation if (recommender is IUserRelationAwareRecommender) { if (user_relations_file == null) { Usage("Recommender expects --user-relation=FILE."); } else { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats } } // item relation if (recommender is IItemRelationAwareRecommender) { if (user_relations_file == null) { Usage("Recommender expects --item-relation=FILE."); } else { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats } } // test data if (test_ratio == 0) { if (test_file != null) { test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping); } } else { var split = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); }
static void LoadData() { TimeSpan loading_time = Utils.MeasureTime(delegate() { // training data training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping); // relevant users and items if (relevant_users_file != null) relevant_users = new HashSet<int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file)))); else relevant_users = training_data.AllUsers; if (relevant_items_file != null) relevant_items = new HashSet<int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file)))); else relevant_items = training_data.AllItems; if (! (recommender is MyMediaLite.ItemRecommendation.Random)) ((ItemRecommender)recommender).Feedback = training_data; // user attributes if (recommender is IUserAttributeAwareRecommender) { if (user_attributes_file == null) Usage("Recommender expects --user-attributes=FILE."); else ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); } // item attributes if (recommender is IItemAttributeAwareRecommender) { if (item_attributes_file == null) Usage("Recommender expects --item-attributes=FILE."); else ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } if (filtered_eval) { if (item_attributes_file == null) Usage("--filtered-evaluation expects --item-attributes=FILE."); else item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } // user relation if (recommender is IUserRelationAwareRecommender) if (user_relations_file == null) { Usage("Recommender expects --user-relation=FILE."); } else { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats } // item relation if (recommender is IItemRelationAwareRecommender) if (user_relations_file == null) { Usage("Recommender expects --item-relation=FILE."); } else { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats } // test data if (test_ratio == 0) { if (test_file != null) test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping); } else { var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); }
protected override void LoadData() { TimeSpan loading_time = Wrap.MeasureTime(delegate() { base.LoadData(); // training data training_data = double.IsNaN(rating_threshold) ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); // test data if (test_ratio == 0) { if (test_file != null) { test_data = double.IsNaN(rating_threshold) ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); } } else { var split = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } if (user_prediction) { // swap file names for test users and candidate items var ruf = test_users_file; var rif = candidate_items_file; test_users_file = rif; candidate_items_file = ruf; // swap user and item mappings var um = user_mapping; var im = item_mapping; user_mapping = im; item_mapping = um; // transpose training and test data training_data = training_data.Transpose(); // transpose test data if (test_data != null) { test_data = test_data.Transpose(); } } for (int i = 0; i < recommenders.Count; i++) { if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender) { ((ItemRecommender)recommenders[i]).Feedback = training_data; } } // test users if (test_users_file != null) { test_users = user_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray()); } else { test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers; } // if necessary, perform user sampling if (num_test_users > 0 && num_test_users < test_users.Count) { var old_test_users = new HashSet <int>(test_users); var new_test_users = new int[num_test_users]; for (int i = 0; i < num_test_users; i++) { int random_index = MyMediaLite.Random.GetInstance().Next(old_test_users.Count - 1); new_test_users[i] = old_test_users.ElementAt(random_index); old_test_users.Remove(new_test_users[i]); } test_users = new_test_users; } // candidate items if (candidate_items_file != null) { candidate_items = item_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray()); } else if (all_items) { candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray(); } if (candidate_items != null) { eval_item_mode = CandidateItems.EXPLICIT; } else if (in_training_items) { eval_item_mode = CandidateItems.TRAINING; } else if (in_test_items) { eval_item_mode = CandidateItems.TEST; } else if (overlap_items) { eval_item_mode = CandidateItems.OVERLAP; } else { eval_item_mode = CandidateItems.UNION; } }); //Salvar arquivos List <string> linesToWrite = new List <string>(); for (int i = 0; i < training_data.UserMatrix.NumberOfRows; i++) { IList <int> columns = training_data.UserMatrix.GetEntriesByRow(i); for (int j = 0; j < columns.Count; j++) { StringBuilder line = new StringBuilder(); line.Append(i.ToString() + " " + columns[j].ToString()); linesToWrite.Add(line.ToString()); } } System.IO.File.WriteAllLines("training.data", linesToWrite.ToArray()); linesToWrite = new List <string>(); for (int i = 0; i < test_data.UserMatrix.NumberOfRows; i++) { IList <int> columns = test_data.UserMatrix.GetEntriesByRow(i); for (int j = 0; j < columns.Count; j++) { StringBuilder line = new StringBuilder(); line.Append(i.ToString() + " " + columns[j].ToString()); linesToWrite.Add(line.ToString()); } } System.IO.File.WriteAllLines("test.data", linesToWrite.ToArray()); /* * List<string> linesToWrite = new List<string>(); * for (int rowIndex = 0; rowIndex < training_data.AllItems.Count; rowIndex++) * { * * }*/ Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); Console.Error.WriteLine("memory {0}", Memory.Usage); }
static void LoadData() { TimeSpan loading_time = Wrap.MeasureTime(delegate() { // training data training_file = Path.Combine(data_dir, training_file); training_data = double.IsNaN(rating_threshold) ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); // user attributes if (user_attributes_file != null) user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); if (recommender is IUserAttributeAwareRecommender) ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes; // item attributes if (item_attributes_file != null) item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); if (recommender is IItemAttributeAwareRecommender) ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes; // user relation if (recommender is IUserRelationAwareRecommender) { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); } // item relation if (recommender is IItemRelationAwareRecommender) { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); } // user groups if (user_groups_file != null) { group_to_user = RelationData.Read(Path.Combine(data_dir, user_groups_file), user_mapping); // assumption: user and user group IDs are disjoint user_groups = group_to_user.NonEmptyRowIDs; Console.WriteLine("{0} user groups", user_groups.Count); } // test data if (test_ratio == 0) { if (test_file != null) { test_file = Path.Combine(data_dir, test_file); test_data = double.IsNaN(rating_threshold) ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); } } else { var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } if (group_method == "GroupsAsUsers") { Console.WriteLine("group recommendation strategy: {0}", group_method); // TODO verify what is going on here //var training_data_group = new PosOnlyFeedback<SparseBooleanMatrix>(); // transform groups to users foreach (int group_id in group_to_user.NonEmptyRowIDs) foreach (int user_id in group_to_user[group_id]) foreach (int item_id in training_data.UserMatrix.GetEntriesByRow(user_id)) training_data.Add(group_id, item_id); // add the users that do not belong to groups //training_data = training_data_group; // transform groups to users var test_data_group = new PosOnlyFeedback<SparseBooleanMatrix>(); foreach (int group_id in group_to_user.NonEmptyRowIDs) foreach (int user_id in group_to_user[group_id]) foreach (int item_id in test_data.UserMatrix.GetEntriesByRow(user_id)) test_data_group.Add(group_id, item_id); test_data = test_data_group; group_method = null; // deactivate s.t. the normal eval routines are used } if (user_prediction) { // swap file names for test users and candidate items var ruf = test_users_file; var rif = candidate_items_file; test_users_file = rif; candidate_items_file = ruf; // swap user and item mappings var um = user_mapping; var im = item_mapping; user_mapping = im; item_mapping = um; // transpose training and test data training_data = training_data.Transpose(); // transpose test data if (test_data != null) test_data = test_data.Transpose(); } if (recommender is MyMediaLite.ItemRecommendation.ItemRecommender) ((ItemRecommender)recommender).Feedback = training_data; // test users if (test_users_file != null) test_users = user_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray() ); else test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers; // if necessary, perform user sampling if (num_test_users > 0 && num_test_users < test_users.Count) { var old_test_users = new HashSet<int>(test_users); var new_test_users = new int[num_test_users]; for (int i = 0; i < num_test_users; i++) { int random_index = MyMediaLite.Util.Random.GetInstance().Next(old_test_users.Count - 1); new_test_users[i] = old_test_users.ElementAt(random_index); old_test_users.Remove(new_test_users[i]); } test_users = new_test_users; } // candidate items if (candidate_items_file != null) candidate_items = item_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray() ); else if (all_items) candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray(); if (candidate_items != null) eval_item_mode = CandidateItems.EXPLICIT; else if (in_training_items) eval_item_mode = CandidateItems.TRAINING; else if (in_test_items) eval_item_mode = CandidateItems.TEST; else if (overlap_items) eval_item_mode = CandidateItems.OVERLAP; else eval_item_mode = CandidateItems.UNION; }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); Console.Error.WriteLine("memory {0}", Memory.Usage); }