/// <summary>Read in implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); var split_chars = new char[] { '\t', ' ', ',' }; string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) { continue; } string[] tokens = line.Split(split_chars); if (tokens.Length < 2) { throw new IOException("Expected at least two columns: " + line); } int user_id = user_mapping.ToInternalID(int.Parse(tokens[0])); int item_id = item_mapping.ToInternalID(int.Parse(tokens[1])); feedback.Add(user_id, item_id); } return(feedback); }
/// <summary>Read in rating data which will be interpreted as implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(TextReader reader, float rating_threshold, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 3) throw new FormatException("Expected at least 3 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); if (rating >= rating_threshold) feedback.Add(user_id, item_id); } return feedback; }
[Test()] public void TestAdd() { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(6, 3); feedback.Add(8, 1); Assert.IsTrue(feedback.UserMatrix[2, 5]); Assert.IsTrue(feedback.UserMatrix[1, 4]); Assert.IsTrue(feedback.UserMatrix[6, 3]); Assert.IsTrue(feedback.UserMatrix[2, 2]); Assert.IsFalse(feedback.UserMatrix[5, 2]); Assert.IsFalse(feedback.UserMatrix[4, 1]); Assert.IsFalse(feedback.UserMatrix[3, 6]); Assert.IsTrue(feedback.ItemMatrix[5, 2]); Assert.IsTrue(feedback.ItemMatrix[4, 1]); Assert.IsTrue(feedback.ItemMatrix[3, 6]); Assert.IsTrue(feedback.ItemMatrix[2, 2]); Assert.IsFalse(feedback.ItemMatrix[2, 5]); Assert.IsFalse(feedback.ItemMatrix[1, 4]); Assert.IsFalse(feedback.ItemMatrix[6, 3]); Assert.AreEqual(8, feedback.Count); }
/// <summary>Read in rating data which will be interpreted as implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(IDataReader reader, float rating_threshold, IMapping user_mapping, IMapping item_mapping) { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); if (reader.FieldCount < 3) { throw new FormatException("Expected at least 3 columns."); } Func <string> get_user_id = reader.GetStringGetter(0); Func <string> get_item_id = reader.GetStringGetter(1); Func <float> get_rating = reader.GetFloatGetter(2); while (reader.Read()) { int user_id = user_mapping.ToInternalID(get_user_id()); int item_id = item_mapping.ToInternalID(get_item_id()); float rating = get_rating(); if (rating >= rating_threshold) { feedback.Add(user_id, item_id); } } return(feedback); }
[Test()] public void TestAdd() { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(6, 3); feedback.Add(8, 1); Assert.IsTrue(feedback.UserMatrix[2, 5]); Assert.IsTrue(feedback.UserMatrix[1, 4]); Assert.IsTrue(feedback.UserMatrix[6, 3]); Assert.IsTrue(feedback.UserMatrix[2, 2]); Assert.IsFalse(feedback.UserMatrix[5, 2]); Assert.IsFalse(feedback.UserMatrix[4, 1]); Assert.IsFalse(feedback.UserMatrix[3, 6]); Assert.IsTrue(feedback.ItemMatrix[5, 2]); Assert.IsTrue(feedback.ItemMatrix[4, 1]); Assert.IsTrue(feedback.ItemMatrix[3, 6]); Assert.IsTrue(feedback.ItemMatrix[2, 2]); Assert.IsFalse(feedback.ItemMatrix[2, 5]); Assert.IsFalse(feedback.ItemMatrix[1, 4]); Assert.IsFalse(feedback.ItemMatrix[6, 3]); Assert.AreEqual(8, feedback.Count); }
[Test()] public void TestRemove() { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(3, 3); feedback.Add(6, 3); Assert.AreEqual(8, feedback.Count); Assert.IsTrue(feedback.UserMatrix[2, 5]); feedback.Remove(2, 5); Assert.AreEqual(7, feedback.Count); feedback.Remove(6, 3); Assert.AreEqual(6, feedback.Count); Assert.IsFalse(feedback.UserMatrix[5, 2]); feedback.Remove(5, 2); Assert.IsFalse(feedback.UserMatrix[5, 2]); Assert.AreEqual(6, feedback.Count); }
/// <summary>Read in implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); var split_chars = new char[]{ '\t', ' ', ',' }; string line; while ( (line = reader.ReadLine()) != null ) { if (line.Trim().Length == 0) continue; string[] tokens = line.Split(split_chars); if (tokens.Length < 2) throw new IOException("Expected at least two columns: " + line); int user_id = user_mapping.ToInternalID(int.Parse(tokens[0])); int item_id = item_mapping.ToInternalID(int.Parse(tokens[1])); feedback.Add(user_id, item_id); } return feedback; }
private static void startBPRMF(ITimedRatings all_data) { removeUserThreshold(ref all_data); Console.WriteLine("Start iteration Test BPRMF"); //for (int i = 0; i < 5; i++) { ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 20% ITimedRatings training_data = new TimedRatings(); // 70% readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data); IPosOnlyFeedback training_data_pos = new PosOnlyFeedback <SparseBooleanMatrix> (); // 80% for (int index = 0; index < training_data.Users.Count; index++) { training_data_pos.Add(training_data.Users [index], training_data.Items [index]); } MyMediaLite.ItemRecommendation.BPRMF recommender = new MyMediaLite.ItemRecommendation.BPRMF(); recommender.Feedback = training_data_pos; DateTime start_time = DateTime.Now; recommender.Train(); Console.Write("Total Training time needed:"); Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); Console.WriteLine("Final results in this iteration:"); var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false); results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false); //} }
/// <summary>Default constructor</summary> public SVDPlusPlus() : base() { AdditionalFeedback = new PosOnlyFeedback<SparseBooleanMatrix>(); // in case no test data is provided Regularization = 0.015f; LearnRate = 0.001f; BiasLearnRate = 0.7f; BiasReg = 0.33f; }
/// <summary>Default constructor</summary> public SVDPlusPlus() : base() { AdditionalFeedback = new PosOnlyFeedback <SparseBooleanMatrix>(); // in case no test data is provided Regularization = 0.015f; LearnRate = 0.001f; BiasLearnRate = 0.7f; BiasReg = 0.33f; }
public static IPosOnlyFeedback CreatePosOnlyFeedback() { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); feedback.Add(0, 0); feedback.Add(0, 1); feedback.Add(1, 0); feedback.Add(1, 2); return feedback; }
/// <summary>Default constructor</summary> public SigmoidUserAsymmetricFactorModel() : base() { AdditionalFeedback = new PosOnlyFeedback<SparseBooleanMatrix>(); // in case no test data is provided Regularization = 0.015f; LearnRate = 0.001f; BiasLearnRate = 0.7f; BiasReg = 0.33f; }
/// <summary>Online evaluation for rankings of items</summary> /// <remarks> /// The evaluation protocol works as follows: /// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update. /// The sequence of users is random. /// </remarks> /// <param name="recommender">the item recommender to be evaluated</param> /// <param name="test">test cases</param> /// <param name="training">training data (must be connected to the recommender's training data)</param> /// <param name="test_users">a list of all test user IDs</param> /// <param name="candidate_items">a list of all candidate item IDs</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <returns>a dictionary containing the evaluation results (averaged by user)</returns> public static ItemRecommendationEvaluationResults EvaluateOnline( this IRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback training, IList<int> test_users, IList<int> candidate_items, CandidateItems candidate_item_mode) { var incremental_recommender = recommender as IIncrementalItemRecommender; if (incremental_recommender == null) throw new ArgumentException("recommender must be of type IIncrementalItemRecommender"); // prepare candidate items once to avoid recreating them switch (candidate_item_mode) { case CandidateItems.TRAINING: candidate_items = training.AllItems; break; case CandidateItems.TEST: candidate_items = test.AllItems; break; case CandidateItems.OVERLAP: candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break; case CandidateItems.UNION: candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break; } test_users.Shuffle(); var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>(); foreach (int user_id in test_users) { if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0) continue; // prepare data var current_test_data = new PosOnlyFeedback<SparseBooleanMatrix>(); foreach (int index in test.ByUser[user_id]) current_test_data.Add(user_id, test.Items[index]); // evaluate user var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT); results_by_user[user_id] = current_result; // update recommender var tuples = new List<Tuple<int, int>>(); foreach (int index in test.ByUser[user_id]) tuples.Add(Tuple.Create(user_id, test.Items[index])); incremental_recommender.AddFeedback(tuples); } var results = new ItemRecommendationEvaluationResults(); foreach (int u in results_by_user.Keys) foreach (string measure in Items.Measures) results[measure] += results_by_user[u][measure]; foreach (string measure in Items.Measures) results[measure] /= results_by_user.Count; results["num_users"] = results_by_user.Count; results["num_items"] = candidate_items.Count; results["num_lists"] = results_by_user.Count; return results; }
public static IPosOnlyFeedback CreatePosOnlyFeedback() { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); feedback.Add(0, 0); feedback.Add(0, 1); feedback.Add(1, 0); feedback.Add(1, 2); return(feedback); }
protected override EvaluationResults Evaluate() { int predict_items_number = -1; var test_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(test_data); var training_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(training_data); return recommender.Evaluate( test_data_posonly, training_data_posonly, test_users, candidate_items, eval_item_mode, RepeatedEvents.No, predict_items_number ); }
protected override EvaluationResults Evaluate() { int predict_items_number = -1; var test_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(test_data); var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(training_data); return(recommender.Evaluate( test_data_posonly, training_data_posonly, test_users, candidate_items, eval_item_mode, RepeatedEvents.No, predict_items_number )); }
public static PosOnlyFeedback <SparseBooleanMatrix> ToPosOnlyFeedback(this IEnumerable <ItemRanking> source, Mapping usersMap, Mapping itemsMap) { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); // Convert items to MyMediaLite PositiveOnly format foreach (var itemRanking in source) { feedback.Add(usersMap.ToInternalID(itemRanking.User.Id), itemsMap.ToInternalID(itemRanking.Item.Id)); } return(feedback); }
protected override Dictionary<string, float> Evaluate() { // TODO make more configurable bool repeat_eval = false; int predict_items_number = -1; var test_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(test_data); var training_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(training_data); return recommender.Evaluate( test_data_posonly, training_data_posonly, test_users, candidate_items, eval_item_mode, repeat_eval, predict_items_number ); }
public override void Train(Split split) { var mmlInstance = (BPRFM)MmlRecommenderInstance; var featBuilder = new FmFeatureBuilder(); var mmlFeedback = new PosOnlyFeedback <SparseBooleanMatrix>(); var wBprFm = MmlRecommenderInstance as WeightedBPRFM; foreach (var feedback in split.Train) { mmlFeedback.Add(UsersMap.ToInternalID(feedback.User.Id), ItemsMap.ToInternalID(feedback.Item.Id)); // the attributes are translated so that they can be used later for training foreach (var attr in feedback.GetAllAttributes()) { attr.Translation = featBuilder.TranslateAttribute(attr); // hard code attribute group. User is 0, item is 1, others is 2 attr.Group = 2; if (wBprFm != null && !wBprFm.FeatureGroups.ContainsKey(attr.Translation.Item1)) { wBprFm.FeatureGroups.Add(attr.Translation.Item1, 2); } } } foreach (var feedback in split.Test) { // the attributes are translated so that they can be used later for training foreach (var attr in feedback.GetAllAttributes()) { attr.Translation = featBuilder.TranslateAttribute(attr); // hard code attribute group. User is 0, item is 1, others is 2 attr.Group = 2; if (wBprFm != null && !wBprFm.FeatureGroups.ContainsKey(attr.Translation.Item1)) { wBprFm?.FeatureGroups.Add(attr.Translation.Item1, 2); } } } mmlInstance.Feedback = mmlFeedback; mmlInstance.Split = split; mmlInstance.Model = this; mmlInstance.UsersMap = UsersMap; mmlInstance.ItemsMap = ItemsMap; mmlInstance.FeatureBuilder = featBuilder; Logger.Current.Trace("Training with MmlBprfmRecommender recommender..."); PureTrainTime = (int)Wrap.MeasureTime(delegate() { mmlInstance.Train(); }).TotalMilliseconds; }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">an item recommender</param> /// <param name="split">a dataset split</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_results">set to true to print results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public EvaluationResults DoRatingBasedRankingCrossValidation( this RatingPredictor recommender, ISplit<IRatings> split, IList<int> candidate_items, CandidateItems candidate_item_mode = CandidateItems.OVERLAP, bool compute_fit = false, bool show_results = false) { var avg_results = new ItemRecommendationEvaluationResults(); Parallel.For(0, (int) split.NumberOfFolds, fold => { try { var split_recommender = (RatingPredictor) recommender.Clone(); // avoid changes in recommender split_recommender.Ratings = split.Train[fold]; split_recommender.Train(); var test_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Test[fold]); var training_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Train[fold]); IList<int> test_users = test_data_posonly.AllUsers; var fold_results = Items.Evaluate(split_recommender, test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode); if (compute_fit) fold_results["fit"] = (float) split_recommender.ComputeFit(); // thread-safe stats lock (avg_results) foreach (var key in fold_results.Keys) if (avg_results.ContainsKey(key)) avg_results[key] += fold_results[key]; else avg_results[key] = fold_results[key]; if (show_results) Console.Error.WriteLine("fold {0} {1}", fold, fold_results); } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); foreach (var key in Items.Measures) avg_results[key] /= split.NumberOfFolds; avg_results["num_users"] /= split.NumberOfFolds; avg_results["num_items"] /= split.NumberOfFolds; return avg_results; }
[Test()] public void TestMaxUserIDMaxItemID() { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(6, 3); Assert.AreEqual(6, feedback.MaxUserID); Assert.AreEqual(8, feedback.MaxItemID); }
private void btnProcess_ItemClick(object sender, ItemClickEventArgs e) { // Validating if (editUserID.EditValue.ToString().IsEmpty()) { XtraMessageBox.Show(this, "Vui lòng nhập ID User", "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } var uid = int.Parse(editUserID.EditValue.ToString()); var count = int.Parse(editCount.EditValue.ToString()); if (relas.All(r => r.UserID != uid)) { XtraMessageBox.Show(this, "User ID không hợp lệ", "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } ItemRecommender recommender; if (cbType.EditValue.ToString().Equals("User Based")) { recommender = new UserKNN(); } else if (cbType.EditValue.ToString().Equals("Item Based")) { recommender = new ItemKNN(); } else { XtraMessageBox.Show(this, "Invalid value " + cbType.EditValue, "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Asterisk); return; } var mat = new PosOnlyFeedback <SparseBooleanMatrix>(); foreach (var rela in relas) { mat.Add(rela.UserID, rela.SongID); } recommender.Feedback = mat; recommender.Train(); var result = recommender.Recommend(uid, count); gridResult.DataSource = result.Select(i => new KetQua { BaiHat = songs.Find(song => song.ID == i.Item1).Name, DiemSo = i.Item2 }) .ToList(); gridResult.RefreshDataSource(); }
[Test()] public void TestMaxUserIDMaxItemID() { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(6, 3); Assert.AreEqual(6, feedback.MaxUserID); Assert.AreEqual(8, feedback.MaxItemID); }
[Test()] public void TestAllItems() { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(3, 3); feedback.Add(6, 3); Assert.AreEqual(6, feedback.AllItems.Count); }
/// <summary>Read in implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) { user_mapping = new IdentityMapping(); } if (item_mapping == null) { item_mapping = new IdentityMapping(); } if (ignore_first_line) { reader.ReadLine(); } var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) { continue; } string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 2) { throw new FormatException("Expected at least 2 columns: " + line); } try { int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); feedback.Add(user_id, item_id); } catch (Exception) { throw new FormatException(string.Format("Could not read line '{0}'", line)); } } return(feedback); }
/// <summary>Read in rating data which will be interpreted as implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(TextReader reader, float rating_threshold, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) { user_mapping = new IdentityMapping(); } if (item_mapping == null) { item_mapping = new IdentityMapping(); } if (ignore_first_line) { reader.ReadLine(); } var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) { continue; } string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 3) { throw new FormatException("Expected at least 3 columns: " + line); } int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); if (rating >= rating_threshold) { feedback.Add(user_id, item_id); } } return(feedback); }
public override void Train(Split split) { // Convert trainset to MyMediaLite trianset format if (DataType == IO.DataType.Ratings) { var mmlFeedback = new Ratings(); foreach (var feedback in split.Train) { var rating = (Rating)feedback; mmlFeedback.Add(UsersMap.ToInternalID(rating.User.Id), ItemsMap.ToInternalID(rating.Item.Id), rating.Value); } ((IRatingPredictor)MmlRecommenderInstance).Ratings = mmlFeedback; } else if (DataType == IO.DataType.TimeAwareRating) { var mmlFeedback = new TimedRatings(); var firstRatingMl10M = new DateTime(1998, 11, 1); foreach (var feedback in split.Train) { var rating = (Rating)feedback; var time = firstRatingMl10M.AddDays(double.Parse(feedback.Attributes["timestamp"].Value)); mmlFeedback.Add(UsersMap.ToInternalID(rating.User.Id), ItemsMap.ToInternalID(rating.Item.Id), rating.Value, time); } ((ITimeAwareRatingPredictor)MmlRecommenderInstance).Ratings = mmlFeedback; } else { var mmlFeedback = new PosOnlyFeedback <SparseBooleanMatrix>(); foreach (var feedback in split.Train) { mmlFeedback.Add(UsersMap.ToInternalID(feedback.User.Id), ItemsMap.ToInternalID(feedback.Item.Id)); } ((ItemRecommender)MmlRecommenderInstance).Feedback = mmlFeedback; if (MmlRecommenderInstance is IModelAwareRecommender) { ((IModelAwareRecommender)MmlRecommenderInstance).Model = this; } } Logger.Current.Trace("Training with MyMediaLite recommender..."); PureTrainTime = (int)Wrap.MeasureTime(delegate() { MmlRecommenderInstance.Train(); }).TotalMilliseconds; }
/// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); if (reader.FieldCount < 2) { throw new IOException("Expected at least two columns."); } while (reader.Read()) { int user_id = user_mapping.ToInternalID(reader.GetInt32(0)); int item_id = item_mapping.ToInternalID(reader.GetInt32(1)); feedback.Add(user_id, item_id); } return(feedback); }
private static void startUserKNN(string data) { MyMediaLite.Data.Mapping user_mapping = new MyMediaLite.Data.Mapping(); MyMediaLite.Data.Mapping item_mapping = new MyMediaLite.Data.Mapping(); ITimedRatings all_data = readDataMapped(data, ref user_mapping, ref item_mapping); removeUserThreshold(ref all_data); Console.WriteLine("Start iteration Test UserKNN"); //for (int i = 0; i < 5; i++) { ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 20% ITimedRatings training_data = new TimedRatings(); // 70% readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data); IPosOnlyFeedback training_data_pos = new PosOnlyFeedback <SparseBooleanMatrix> (); // 80% for (int index = 0; index < training_data.Users.Count; index++) { training_data_pos.Add(training_data.Users [index], training_data.Items [index]); } MyMediaLite.ItemRecommendation.UserKNN recommender = new MyMediaLite.ItemRecommendation.UserKNN(); recommender.K = 80; recommender.Q = 1; recommender.Weighted = false; recommender.Alpha = 0.5f; recommender.Correlation = MyMediaLite.Correlation.BinaryCorrelationType.Jaccard; recommender.Feedback = training_data_pos; DateTime start_time = DateTime.Now; recommender.Train(); Console.Write("Total Training time needed:"); Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); Console.WriteLine("Final results in this iteration:"); var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false); results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false); //} }
public void SetUp() { training_data = new PosOnlyFeedback<SparseBooleanMatrix>(); training_data.Add(1, 1); training_data.Add(1, 2); training_data.Add(2, 2); training_data.Add(2, 3); training_data.Add(3, 1); training_data.Add(3, 2); recommender = new MostPopular() { Feedback = training_data }; recommender.Train(); test_data = new PosOnlyFeedback<SparseBooleanMatrix>(); test_data.Add(2, 3); test_data.Add(2, 4); test_data.Add(4, 4); all_users = Enumerable.Range(1, 4).ToList(); candidate_items = Enumerable.Range(1, 5).ToList(); }
[Test()] public void TestGetItemMatrixCopy() { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(6, 3); feedback.Add(8, 1); var item_matrix = feedback.GetItemMatrixCopy(); // check whether we got the item matrix Assert.IsTrue(item_matrix[5, 2]); Assert.IsTrue(item_matrix[4, 1]); Assert.IsTrue(item_matrix[3, 6]); Assert.IsTrue(item_matrix[2, 2]); Assert.IsFalse(item_matrix[2, 5]); Assert.IsFalse(item_matrix[1, 4]); Assert.IsFalse(item_matrix[6, 3]); // check de-coupling item_matrix[5, 2] = false; Assert.IsFalse(item_matrix[5, 2]); Assert.IsTrue(feedback.ItemMatrix[5, 2]); Assert.IsTrue(feedback.ItemMatrix[4, 1]); Assert.IsTrue(feedback.ItemMatrix[3, 6]); Assert.IsTrue(feedback.ItemMatrix[2, 2]); Assert.IsFalse(feedback.ItemMatrix[2, 5]); Assert.IsFalse(feedback.ItemMatrix[1, 4]); Assert.IsFalse(feedback.ItemMatrix[6, 3]); Assert.AreEqual(8, feedback.Count); }
[Test()] public void TestGetItemMatrixCopy() { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(6, 3); feedback.Add(8, 1); var item_matrix = feedback.GetItemMatrixCopy(); // check whether we got the item matrix Assert.IsTrue(item_matrix[5, 2]); Assert.IsTrue(item_matrix[4, 1]); Assert.IsTrue(item_matrix[3, 6]); Assert.IsTrue(item_matrix[2, 2]); Assert.IsFalse(item_matrix[2, 5]); Assert.IsFalse(item_matrix[1, 4]); Assert.IsFalse(item_matrix[6, 3]); // check de-coupling item_matrix[5, 2] = false; Assert.IsFalse(item_matrix[5, 2]); Assert.IsTrue(feedback.ItemMatrix[5, 2]); Assert.IsTrue(feedback.ItemMatrix[4, 1]); Assert.IsTrue(feedback.ItemMatrix[3, 6]); Assert.IsTrue(feedback.ItemMatrix[2, 2]); Assert.IsFalse(feedback.ItemMatrix[2, 5]); Assert.IsFalse(feedback.ItemMatrix[1, 4]); Assert.IsFalse(feedback.ItemMatrix[6, 3]); Assert.AreEqual(8, feedback.Count); }
public void Train(IBasicTrainingData <IPositiveFeedbackForUser> trainingData) { m_realUserIdToMediaLiteUserId = new Dictionary <int, int>(); m_mediaLiteUserIdToRealUserId = new Dictionary <int, int>(); m_nextMediaLiteUserId = 0; m_realItemIdToMediaLiteItemId = new Dictionary <int, int>(); m_mediaLiteItemIdToRealItemId = new Dictionary <int, int>(); m_nextMediaLiteItemId = 0; PosOnlyFeedback <SparseBooleanMatrix> mediaLiteFeedback = new PosOnlyFeedback <SparseBooleanMatrix>(); foreach (KeyValuePair <int, IPositiveFeedbackForUser> userFeedbackPair in trainingData.Users) { int userId = userFeedbackPair.Key; IPositiveFeedbackForUser feedback = userFeedbackPair.Value; m_realUserIdToMediaLiteUserId[userId] = m_nextMediaLiteUserId; m_mediaLiteUserIdToRealUserId[m_nextMediaLiteUserId] = userId; m_nextMediaLiteUserId++; foreach (int itemId in feedback.Items) { if (!m_realItemIdToMediaLiteItemId.ContainsKey(itemId)) { m_realItemIdToMediaLiteItemId[itemId] = m_nextMediaLiteItemId; m_mediaLiteItemIdToRealItemId[m_nextMediaLiteItemId] = itemId; m_nextMediaLiteItemId++; } mediaLiteFeedback.Add(m_realUserIdToMediaLiteUserId[userId], m_realItemIdToMediaLiteItemId[itemId]); } } m_recommender.Feedback = mediaLiteFeedback; m_recommender.Train(); }
public void SetUp() { training_data = new PosOnlyFeedback <SparseBooleanMatrix>(); training_data.Add(1, 1); training_data.Add(1, 2); training_data.Add(2, 2); training_data.Add(2, 3); training_data.Add(3, 1); training_data.Add(3, 2); recommender = new MostPopular() { Feedback = training_data }; recommender.Train(); test_data = new PosOnlyFeedback <SparseBooleanMatrix>(); test_data.Add(2, 3); test_data.Add(2, 4); test_data.Add(4, 4); all_users = Enumerable.Range(1, 4).ToList(); candidate_items = Enumerable.Range(1, 5).ToList(); }
static IPosOnlyFeedback CreateFeedback(IRatings ratings, double threshold) { SparseBooleanMatrixStatic user_item_matrix = new SparseBooleanMatrixStatic(); for (int u = 0; u <= ratings.MaxUserID; u++) { var items = new List <int>(); foreach (int index in ratings.ByUser[u]) { if (ratings[index] >= threshold) { items.Add(ratings.Items[index]); } } user_item_matrix[u] = items.ToArray(); } var feedback = new PosOnlyFeedback <SparseBooleanMatrixStatic>(user_item_matrix); Console.Error.WriteLine("{0} ratings > {1}", feedback.Count, threshold); return(feedback); }
/// <summary>Read in implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 2) throw new FormatException("Expected at least 2 columns: " + line); try { int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); feedback.Add(user_id, item_id); } catch (Exception) { throw new FormatException(string.Format("Could not read line '{0}'", line)); } } return feedback; }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">an item recommender</param> /// <param name="split">a positive-only feedback dataset split</param> /// <param name="test_users">a collection of integers with all test users</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoRatingBasedRankingIterativeCrossValidation( this RatingPredictor recommender, ISplit<IRatings> split, IList<int> test_users, IList<int> candidate_items, CandidateItems candidate_item_mode, RepeatedEvents repeated_events, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { if (!(recommender is IIterativeModel)) throw new ArgumentException("recommender must be of type IIterativeModel"); var split_recommenders = new RatingPredictor[split.NumberOfFolds]; var iterative_recommenders = new IIterativeModel[split.NumberOfFolds]; var fold_results = new ItemRecommendationEvaluationResults[split.NumberOfFolds]; // initial training and evaluation Parallel.For(0, (int) split.NumberOfFolds, i => { try { split_recommenders[i] = (RatingPredictor) recommender.Clone(); // to avoid changes in recommender split_recommenders[i].Ratings = split.Train[i]; split_recommenders[i].Train(); iterative_recommenders[i] = (IIterativeModel) split_recommenders[i]; var test_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Test[i]); var training_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Train[i]); fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events); if (show_fold_results) Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, iterative_recommenders[i].NumIter); } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), iterative_recommenders[0].NumIter); // iterative training and evaluation for (int it = (int) iterative_recommenders[0].NumIter + 1; it <= max_iter; it++) { Parallel.For(0, (int) split.NumberOfFolds, i => { try { iterative_recommenders[i].Iterate(); if (it % find_iter == 0) { var test_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Test[i]); var training_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Train[i]); fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events); if (show_fold_results) Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, it); } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), it); } }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">an item recommender</param> /// <param name="split">a positive-only feedback dataset split</param> /// <param name="test_users">a collection of integers with all test users</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoRatingBasedRankingIterativeCrossValidation( this RatingPredictor recommender, ISplit <IRatings> split, IList <int> test_users, IList <int> candidate_items, CandidateItems candidate_item_mode, RepeatedEvents repeated_events, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { if (!(recommender is IIterativeModel)) { throw new ArgumentException("recommender must be of type IIterativeModel"); } var split_recommenders = new RatingPredictor[split.NumberOfFolds]; var iterative_recommenders = new IIterativeModel[split.NumberOfFolds]; var fold_results = new ItemRecommendationEvaluationResults[split.NumberOfFolds]; // initial training and evaluation Parallel.For(0, (int)split.NumberOfFolds, i => { try { split_recommenders[i] = (RatingPredictor)recommender.Clone(); // to avoid changes in recommender split_recommenders[i].Ratings = split.Train[i]; split_recommenders[i].Train(); iterative_recommenders[i] = (IIterativeModel)split_recommenders[i]; var test_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]); var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]); fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events); if (show_fold_results) { Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, iterative_recommenders[i].NumIter); } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), iterative_recommenders[0].NumIter); // iterative training and evaluation for (int it = (int)iterative_recommenders[0].NumIter + 1; it <= max_iter; it++) { Parallel.For(0, (int)split.NumberOfFolds, i => { try { iterative_recommenders[i].Iterate(); if (it % find_iter == 0) { var test_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]); var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]); fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events); if (show_fold_results) { Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, it); } } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), it); } }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">an item recommender</param> /// <param name="split">a dataset split</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_results">set to true to print results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public EvaluationResults DoRatingBasedRankingCrossValidation( this RatingPredictor recommender, ISplit <IRatings> split, IList <int> candidate_items, CandidateItems candidate_item_mode = CandidateItems.OVERLAP, bool compute_fit = false, bool show_results = false) { var avg_results = new ItemRecommendationEvaluationResults(); Parallel.For(0, (int)split.NumberOfFolds, fold => { try { var split_recommender = (RatingPredictor)recommender.Clone(); // avoid changes in recommender split_recommender.Ratings = split.Train[fold]; split_recommender.Train(); var test_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[fold]); var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[fold]); IList <int> test_users = test_data_posonly.AllUsers; var fold_results = Items.Evaluate(split_recommender, test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode); if (compute_fit) { fold_results["fit"] = (float)split_recommender.ComputeFit(); } // thread-safe stats lock (avg_results) foreach (var key in fold_results.Keys) { if (avg_results.ContainsKey(key)) { avg_results[key] += fold_results[key]; } else { avg_results[key] = fold_results[key]; } } if (show_results) { Console.Error.WriteLine("fold {0} {1}", fold, fold_results); } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); foreach (var key in Items.Measures) { avg_results[key] /= split.NumberOfFolds; } avg_results["num_users"] /= split.NumberOfFolds; avg_results["num_items"] /= split.NumberOfFolds; return(avg_results); }
/// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); if (reader.FieldCount < 2) throw new IOException("Expected at least two columns."); while (reader.Read()) { int user_id = user_mapping.ToInternalID(reader.GetInt32(0)); int item_id = item_mapping.ToInternalID(reader.GetInt32(1)); feedback.Add(user_id, item_id); } return feedback; }
static void LoadData() { TimeSpan loading_time = Wrap.MeasureTime(delegate() { // training data training_file = Path.Combine(data_dir, training_file); training_data = double.IsNaN(rating_threshold) ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); // user attributes if (user_attributes_file != null) user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); if (recommender is IUserAttributeAwareRecommender) ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes; // item attributes if (item_attributes_file != null) item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); if (recommender is IItemAttributeAwareRecommender) ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes; // user relation if (recommender is IUserRelationAwareRecommender) { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); } // item relation if (recommender is IItemRelationAwareRecommender) { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); } // user groups if (user_groups_file != null) { group_to_user = RelationData.Read(Path.Combine(data_dir, user_groups_file), user_mapping); // assumption: user and user group IDs are disjoint user_groups = group_to_user.NonEmptyRowIDs; Console.WriteLine("{0} user groups", user_groups.Count); } // test data if (test_ratio == 0) { if (test_file != null) { test_file = Path.Combine(data_dir, test_file); test_data = double.IsNaN(rating_threshold) ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE) : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE); } } else { var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } if (group_method == "GroupsAsUsers") { Console.WriteLine("group recommendation strategy: {0}", group_method); // TODO verify what is going on here //var training_data_group = new PosOnlyFeedback<SparseBooleanMatrix>(); // transform groups to users foreach (int group_id in group_to_user.NonEmptyRowIDs) foreach (int user_id in group_to_user[group_id]) foreach (int item_id in training_data.UserMatrix.GetEntriesByRow(user_id)) training_data.Add(group_id, item_id); // add the users that do not belong to groups //training_data = training_data_group; // transform groups to users var test_data_group = new PosOnlyFeedback<SparseBooleanMatrix>(); foreach (int group_id in group_to_user.NonEmptyRowIDs) foreach (int user_id in group_to_user[group_id]) foreach (int item_id in test_data.UserMatrix.GetEntriesByRow(user_id)) test_data_group.Add(group_id, item_id); test_data = test_data_group; group_method = null; // deactivate s.t. the normal eval routines are used } if (user_prediction) { // swap file names for test users and candidate items var ruf = test_users_file; var rif = candidate_items_file; test_users_file = rif; candidate_items_file = ruf; // swap user and item mappings var um = user_mapping; var im = item_mapping; user_mapping = im; item_mapping = um; // transpose training and test data training_data = training_data.Transpose(); // transpose test data if (test_data != null) test_data = test_data.Transpose(); } if (recommender is MyMediaLite.ItemRecommendation.ItemRecommender) ((ItemRecommender)recommender).Feedback = training_data; // test users if (test_users_file != null) test_users = user_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray() ); else test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers; // if necessary, perform user sampling if (num_test_users > 0 && num_test_users < test_users.Count) { var old_test_users = new HashSet<int>(test_users); var new_test_users = new int[num_test_users]; for (int i = 0; i < num_test_users; i++) { int random_index = MyMediaLite.Util.Random.GetInstance().Next(old_test_users.Count - 1); new_test_users[i] = old_test_users.ElementAt(random_index); old_test_users.Remove(new_test_users[i]); } test_users = new_test_users; } // candidate items if (candidate_items_file != null) candidate_items = item_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray() ); else if (all_items) candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray(); if (candidate_items != null) eval_item_mode = CandidateItems.EXPLICIT; else if (in_training_items) eval_item_mode = CandidateItems.TRAINING; else if (in_test_items) eval_item_mode = CandidateItems.TEST; else if (overlap_items) eval_item_mode = CandidateItems.OVERLAP; else eval_item_mode = CandidateItems.UNION; }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); Console.Error.WriteLine("memory {0}", Memory.Usage); }
static IPosOnlyFeedback CreateFeedback(IRatings ratings, double threshold) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); for (int index = 0; index < ratings.Count; index++) if (ratings[index] >= threshold) feedback.Add(ratings.Users[index], ratings.Items[index]); Console.Error.WriteLine("{0} ratings > {1}", feedback.Count, threshold); return feedback; }
/// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(IDataReader reader, IMapping user_mapping, IMapping item_mapping) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); if (reader.FieldCount < 2) throw new FormatException("Expected at least 2 columns."); Func<string> get_user_id = reader.GetStringGetter(0); Func<string> get_item_id = reader.GetStringGetter(1); while (reader.Read()) { int user_id = user_mapping.ToInternalID(get_user_id()); int item_id = item_mapping.ToInternalID(get_item_id()); feedback.Add(user_id, item_id); } return feedback; }
/// <summary>Read in rating data which will be interpreted as implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(IDataReader reader, float rating_threshold, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); if (reader.FieldCount < 3) throw new FormatException("Expected at least 3 columns."); Func<string> get_user_id = reader.GetStringGetter(0); Func<string> get_item_id = reader.GetStringGetter(1); Func<float> get_rating = reader.GetFloatGetter(2); while (reader.Read()) { int user_id = user_mapping.ToInternalID(get_user_id()); int item_id = item_mapping.ToInternalID(get_item_id()); float rating = get_rating(); if (rating >= rating_threshold) feedback.Add(user_id, item_id); } return feedback; }
// TODO consider micro- (by item) and macro-averaging (by user, the current thing) /// <summary>Online evaluation for rankings of items</summary> /// <remarks> /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data (must be connected to the recommender's training data)</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results (averaged by user)</returns> static public Dictionary <string, double> EvaluateOnline( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, ICollection <int> relevant_users, ICollection <int> relevant_items) { // for better handling, move test data points into arrays var users = new int[test.Count]; var items = new int[test.Count]; int pos = 0; foreach (int user_id in test.UserMatrix.NonEmptyRowIDs) { foreach (int item_id in test.UserMatrix[user_id]) { users[pos] = user_id; items[pos] = item_id; pos++; } } // random order of the test data points // TODO chronological order var random_index = new int[test.Count]; for (int index = 0; index < random_index.Length; index++) { random_index[index] = index; } Util.Utils.Shuffle <int>(random_index); var results_by_user = new Dictionary <int, Dictionary <string, double> >(); foreach (int index in random_index) { if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index])) { // evaluate user var current_test = new PosOnlyFeedback <SparseBooleanMatrix>(); current_test.Add(users[index], items[index]); var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items); if (current_result["num_users"] == 1) { if (results_by_user.ContainsKey(users[index])) { foreach (string measure in Measures) { results_by_user[users[index]][measure] += current_result[measure]; } results_by_user[users[index]]["num_items"]++; } else { results_by_user[users[index]] = current_result; results_by_user[users[index]]["num_items"] = 1; results_by_user[users[index]].Remove("num_users"); } } } // update recommender recommender.AddFeedback(users[index], items[index]); } var results = new Dictionary <string, double>(); foreach (string measure in Measures) { results[measure] = 0; } foreach (int u in results_by_user.Keys) { foreach (string measure in Measures) { results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"]; } } foreach (string measure in Measures) { results[measure] /= results_by_user.Count; } results["num_users"] = results_by_user.Count; results["num_items"] = relevant_items.Count; results["num_lists"] = test.Count; // FIXME this is not exact return(results); }
static IPosOnlyFeedback CreateFeedback(IRatings ratings, double threshold) { SparseBooleanMatrixStatic user_item_matrix = new SparseBooleanMatrixStatic(); for (int u = 0; u <= ratings.MaxUserID; u++) { var items = new List<int>(); foreach (int index in ratings.ByUser[u]) if (ratings[index] >= threshold) items.Add(ratings.Items[index]); user_item_matrix[u] = items.ToArray(); } var feedback = new PosOnlyFeedback<SparseBooleanMatrixStatic>(user_item_matrix); Console.Error.WriteLine("{0} ratings > {1}", feedback.Count, threshold); return feedback; }
// TODO consider micro- (by item) and macro-averaging (by user, the current thing) /// <summary>Online evaluation for rankings of items</summary> /// <remarks> /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data (must be connected to the recommender's training data)</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results (averaged by user)</returns> public static Dictionary<string, double> EvaluateOnline( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, ICollection<int> relevant_users, ICollection<int> relevant_items) { // for better handling, move test data points into arrays var users = new int[test.Count]; var items = new int[test.Count]; int pos = 0; foreach (int user_id in test.UserMatrix.NonEmptyRowIDs) foreach (int item_id in test.UserMatrix[user_id]) { users[pos] = user_id; items[pos] = item_id; pos++; } // random order of the test data points // TODO chronological order var random_index = new int[test.Count]; for (int index = 0; index < random_index.Length; index++) random_index[index] = index; Util.Utils.Shuffle<int>(random_index); var results_by_user = new Dictionary<int, Dictionary<string, double>>(); foreach (int index in random_index) { if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index])) { // evaluate user var current_test = new PosOnlyFeedback<SparseBooleanMatrix>(); current_test.Add(users[index], items[index]); var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items); if (current_result["num_users"] == 1) if (results_by_user.ContainsKey(users[index])) { foreach (string measure in Measures) results_by_user[users[index]][measure] += current_result[measure]; results_by_user[users[index]]["num_items"]++; } else { results_by_user[users[index]] = current_result; results_by_user[users[index]]["num_items"] = 1; results_by_user[users[index]].Remove("num_users"); } } // update recommender recommender.AddFeedback(users[index], items[index]); } var results = new Dictionary<string, double>(); foreach (string measure in Measures) results[measure] = 0; foreach (int u in results_by_user.Keys) foreach (string measure in Measures) results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"]; foreach (string measure in Measures) results[measure] /= results_by_user.Count; results["num_users"] = results_by_user.Count; results["num_items"] = relevant_items.Count; results["num_lists"] = test.Count; // FIXME this is not exact return results; }
// TODO consider micro- (by item) and macro-averaging (by user, the current thing); repeated events /// <summary>Online evaluation for rankings of items</summary> /// <remarks> /// </remarks> /// <param name="recommender">the item recommender to be evaluated</param> /// <param name="test">test cases</param> /// <param name="training">training data (must be connected to the recommender's training data)</param> /// <param name="test_users">a list of all test user IDs</param> /// <param name="candidate_items">a list of all candidate item IDs</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <returns>a dictionary containing the evaluation results (averaged by user)</returns> public static ItemRecommendationEvaluationResults EvaluateOnline( this IRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback training, IList<int> test_users, IList<int> candidate_items, CandidateItems candidate_item_mode) { var incremental_recommender = recommender as IIncrementalItemRecommender; if (incremental_recommender == null) throw new ArgumentException("recommender must be of type IIncrementalItemRecommender"); // prepare candidate items once to avoid recreating them switch (candidate_item_mode) { case CandidateItems.TRAINING: candidate_items = training.AllItems; break; case CandidateItems.TEST: candidate_items = test.AllItems; break; case CandidateItems.OVERLAP: candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break; case CandidateItems.UNION: candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break; } candidate_item_mode = CandidateItems.EXPLICIT; // for better handling, move test data points into arrays var users = new int[test.Count]; var items = new int[test.Count]; int pos = 0; foreach (int user_id in test.UserMatrix.NonEmptyRowIDs) foreach (int item_id in test.UserMatrix[user_id]) { users[pos] = user_id; items[pos] = item_id; pos++; } // random order of the test data points // TODO chronological order var random_index = new int[test.Count]; for (int index = 0; index < random_index.Length; index++) random_index[index] = index; random_index.Shuffle(); var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>(); int num_lists = 0; foreach (int index in random_index) { if (test_users.Contains(users[index]) && candidate_items.Contains(items[index])) { // evaluate user var current_test = new PosOnlyFeedback<SparseBooleanMatrix>(); current_test.Add(users[index], items[index]); var current_result = Items.Evaluate(recommender, current_test, training, current_test.AllUsers, candidate_items, candidate_item_mode); if (current_result["num_users"] == 1) if (results_by_user.ContainsKey(users[index])) { foreach (string measure in Items.Measures) results_by_user[users[index]][measure] += current_result[measure]; results_by_user[users[index]]["num_items"]++; num_lists++; } else { results_by_user[users[index]] = current_result; results_by_user[users[index]]["num_items"] = 1; results_by_user[users[index]].Remove("num_users"); } } // update recommender incremental_recommender.AddFeedback(users[index], items[index]); } var results = new ItemRecommendationEvaluationResults(); foreach (int u in results_by_user.Keys) foreach (string measure in Items.Measures) results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"]; foreach (string measure in Items.Measures) results[measure] /= results_by_user.Count; results["num_users"] = results_by_user.Count; results["num_items"] = candidate_items.Count; results["num_lists"] = num_lists; return results; }
/// <summary>Online evaluation for rankings of items</summary> /// <remarks> /// The evaluation protocol works as follows: /// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update. /// The sequence of users is random. /// </remarks> /// <param name="recommender">the item recommender to be evaluated</param> /// <param name="test">test cases</param> /// <param name="training">training data (must be connected to the recommender's training data)</param> /// <param name="test_users">a list of all test user IDs</param> /// <param name="candidate_items">a list of all candidate item IDs</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <returns>a dictionary containing the evaluation results (averaged by user)</returns> static public ItemRecommendationEvaluationResults EvaluateOnline( this IRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback training, IList <int> test_users, IList <int> candidate_items, CandidateItems candidate_item_mode) { var incremental_recommender = recommender as IIncrementalItemRecommender; if (incremental_recommender == null) { throw new ArgumentException("recommender must be of type IIncrementalItemRecommender"); } candidate_items = Items.Candidates(candidate_items, candidate_item_mode, test, training); test_users.Shuffle(); var results_by_user = new Dictionary <int, ItemRecommendationEvaluationResults>(); foreach (int user_id in test_users) { if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0) { continue; } // prepare data var current_test_data = new PosOnlyFeedback <SparseBooleanMatrix>(); foreach (int index in test.ByUser[user_id]) { current_test_data.Add(user_id, test.Items[index]); } // evaluate user var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT); results_by_user[user_id] = current_result; // update recommender var tuples = new List <Tuple <int, int> >(); foreach (int index in test.ByUser[user_id]) { tuples.Add(Tuple.Create(user_id, test.Items[index])); } incremental_recommender.AddFeedback(tuples); // TODO candidate_items should be updated properly } var results = new ItemRecommendationEvaluationResults(); foreach (int u in results_by_user.Keys) { foreach (string measure in Items.Measures) { results[measure] += results_by_user[u][measure]; } } foreach (string measure in Items.Measures) { results[measure] /= results_by_user.Count; } results["num_users"] = results_by_user.Count; results["num_items"] = candidate_items.Count; results["num_lists"] = results_by_user.Count; return(results); }
[Test()] public void TestAllItems() { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 7); feedback.Add(3, 3); feedback.Add(6, 3); Assert.AreEqual(6, feedback.AllItems.Count); }
[Test()] public void TestRemoveItem() { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); feedback.Add(1, 4); feedback.Add(1, 8); feedback.Add(2, 4); feedback.Add(2, 2); feedback.Add(2, 5); feedback.Add(3, 4); feedback.Add(3, 3); Assert.AreEqual(7, feedback.Count); Assert.IsTrue(feedback.UserMatrix[2, 4]); feedback.RemoveItem(4); Assert.IsFalse(feedback.UserMatrix[2, 4]); Assert.AreEqual(4, feedback.Count); }
public DemoMFUserItemAtt() : base() { AdditionalFeedback = new PosOnlyFeedback<SparseBooleanMatrix>(); // in case no test data is provided }