private static void startWRMF(ITimedRatings all_data) { removeUserThreshold(ref all_data); Console.WriteLine("Start iteration Test WRMF"); //for (int i = 0; i < 5; i++) { ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 20% ITimedRatings training_data = new TimedRatings(); // 70% readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data); IPosOnlyFeedback training_data_pos = new PosOnlyFeedback <SparseBooleanMatrix> (); // 80% for (int index = 0; index < training_data.Users.Count; index++) { training_data_pos.Add(training_data.Users [index], training_data.Items [index]); } MyMediaLite.ItemRecommendation.WRMF recommender = new MyMediaLite.ItemRecommendation.WRMF(); recommender.Feedback = training_data_pos; DateTime start_time = DateTime.Now; recommender.Train(); Console.Write("Total Training time needed:"); Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); Console.WriteLine("Final results in this iteration:"); var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false); results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false); //} }
public void TestTimeSplit() { var ratings = new TimedRatings(); ratings.Add(0, 0, 5.0f, new DateTime(2011, 10, 31)); ratings.Add(0, 1, 4.5f, new DateTime(2011, 11, 1)); ratings.Add(1, 0, 1.0f, new DateTime(2011, 10, 31)); ratings.Add(1, 1, 2.5f, new DateTime(2011, 11, 2)); var split1 = new RatingsChronologicalSplit(ratings, new DateTime(2011, 11, 2)); Assert.AreEqual(3, split1.Train[0].Count); Assert.AreEqual(1, split1.Test[0].Count); Assert.AreEqual(2, split1.Train[0].ByUser[0].Count); Assert.AreEqual(1, split1.Train[0].ByUser[1].Count); Assert.AreEqual(0, split1.Test[0].ByUser[0].Count); Assert.AreEqual(1, split1.Test[0].ByUser[1].Count); Assert.AreEqual(new DateTime(2011, 10, 31), split1.Train[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 11, 1), split1.Train[0].LatestTime); Assert.AreEqual(new DateTime(2011, 11, 2), split1.Test[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 11, 2), split1.Test[0].LatestTime); var split2 = new RatingsChronologicalSplit(ratings, new DateTime(2011, 11, 1)); Assert.AreEqual(2, split2.Train[0].Count); Assert.AreEqual(2, split2.Test[0].Count); Assert.AreEqual(1, split2.Train[0].ByUser[0].Count); Assert.AreEqual(1, split2.Train[0].ByUser[1].Count); Assert.AreEqual(1, split2.Test[0].ByUser[0].Count); Assert.AreEqual(1, split2.Test[0].ByUser[1].Count); Assert.AreEqual(new DateTime(2011, 10, 31), split2.Train[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 10, 31), split2.Train[0].LatestTime); Assert.AreEqual(new DateTime(2011, 11, 1), split2.Test[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 11, 2), split2.Test[0].LatestTime); }
public void TestNumberSplit() { var ratings = new TimedRatings(); ratings.Add(0, 0, 5.0f, new DateTime(2011, 10, 31)); ratings.Add(0, 1, 4.5f, new DateTime(2011, 11, 1)); ratings.Add(0, 2, 5.0f, new DateTime(2011, 11, 3)); ratings.Add(0, 3, 4.5f, new DateTime(2011, 11, 4)); ratings.Add(1, 0, 1.0f, new DateTime(2011, 10, 31)); ratings.Add(1, 1, 2.5f, new DateTime(2011, 11, 2)); ratings.Add(1, 2, 1.0f, new DateTime(2011, 12, 1)); ratings.Add(1, 3, 2.5f, new DateTime(2011, 12, 4)); var split1 = new RatingsPerUserChronologicalSplit(ratings, 1); Assert.AreEqual(6, split1.Train[0].Count); Assert.AreEqual(2, split1.Test[0].Count); Assert.AreEqual(3, split1.Train[0].ByUser[0].Count); Assert.AreEqual(3, split1.Train[0].ByUser[1].Count); Assert.AreEqual(1, split1.Test[0].ByUser[0].Count); Assert.AreEqual(1, split1.Test[0].ByUser[1].Count); var split2 = new RatingsPerUserChronologicalSplit(ratings, 2); Assert.AreEqual(4, split2.Train[0].Count); Assert.AreEqual(4, split2.Test[0].Count); Assert.AreEqual(2, split2.Train[0].ByUser[0].Count); Assert.AreEqual(2, split2.Train[0].ByUser[1].Count); Assert.AreEqual(2, split2.Test[0].ByUser[0].Count); Assert.AreEqual(2, split2.Test[0].ByUser[1].Count); var split3 = new RatingsPerUserChronologicalSplit(ratings, 3); Assert.AreEqual(2, split3.Train[0].Count); Assert.AreEqual(6, split3.Test[0].Count); Assert.AreEqual(1, split3.Train[0].ByUser[0].Count); Assert.AreEqual(1, split3.Train[0].ByUser[1].Count); Assert.AreEqual(3, split3.Test[0].ByUser[0].Count); Assert.AreEqual(3, split3.Test[0].ByUser[1].Count); var split4 = new RatingsPerUserChronologicalSplit(ratings, 4); Assert.AreEqual(0, split4.Train[0].Count); Assert.AreEqual(8, split4.Test[0].Count); Assert.AreEqual(0, split4.Train[0].ByUser[0].Count); Assert.AreEqual(0, split4.Train[0].ByUser[1].Count); Assert.AreEqual(4, split4.Test[0].ByUser[0].Count); Assert.AreEqual(4, split4.Test[0].ByUser[1].Count); var split5 = new RatingsPerUserChronologicalSplit(ratings, 5); Assert.AreEqual(0, split5.Train[0].Count); Assert.AreEqual(8, split5.Test[0].Count); Assert.AreEqual(0, split5.Train[0].ByUser[0].Count); Assert.AreEqual(0, split5.Train[0].ByUser[1].Count); Assert.AreEqual(4, split5.Test[0].ByUser[0].Count); Assert.AreEqual(4, split5.Test[0].ByUser[1].Count); }
private static void startIterationTestNew(ITimedRatings all_data, bool weather_aware, double beta, double mu, int city, int iterations, string feature = "") { removeUserThreshold(ref all_data); Console.WriteLine("Start iteration test"); ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 30% ITimedRatings training_data = new TimedRatings(); // 70% readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data); int K = 100; int rangeSize = 10; int user_count = all_data.AllUsers.Count; int location_count = all_data.AllItems.Count(); double[,] U1 = new double[user_count, K]; double[,] U2 = new double[user_count, K]; double[,] L1 = new double[location_count, K]; double[,] L2 = new double[location_count, K]; double[,] L3 = new double[location_count, K]; double[,] F = new double[rangeSize, K]; Dictionary <int, int> idMapperCategories = new Dictionary <int, int> (); Dictionary <int, int> idMapperLocations = new Dictionary <int, int> (); Dictionary <int, int> idMapperUser = new Dictionary <int, int> (); initMatrixNormal(all_data.AllUsers, ref U1, ref idMapperUser, K); initMatrixNormal(all_data.AllUsers, ref U2, ref idMapperUser, K); initMatrixNormal(all_data.AllItems, ref L1, ref idMapperLocations, K); initMatrixNormal(all_data.AllItems, ref L2, ref idMapperLocations, K); initMatrixNormal(all_data.AllItems, ref L3, ref idMapperLocations, K); initMatrixNormal(rangeSize, ref F, K); WeatherContextAwareItemRecommender recommender = new WeatherContextAwareItemRecommender(U1, U2, L1, L2, L3, F, idMapperLocations, idMapperUser, city, feature); recommender.connection_string = connection; recommender.Ratings = training_data; recommender.Validation = validation_data; recommender.Test = test_data; recommender.weather_aware = weather_aware; recommender.rangeSize = rangeSize; recommender.max_iter = iterations; recommender.evaluation_at = 20; recommender.beta = beta; DateTime start_time = DateTime.Now; recommender.Train(); Console.Write("Total Training time needed:"); Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); Console.WriteLine("Final results in this iteration:"); //} }
public static IRatings CreateRandomTimedRatings(int num_users, int num_items, int num_ratings) { var random = MyMediaLite.Random.GetInstance(); var ratings = new TimedRatings(); for (int i = 0; i < num_ratings; i++) { int user_id = random.Next(num_users); int item_id = random.Next(num_items); int rating_value = 1 + random.Next(5); ratings.Add(user_id, item_id, rating_value, DateTime.Now); } return(ratings); }
private static void startIterationTest(ITimedRatings all_data, bool weather_aware, double beta, double mu, int city, int iterations) { removeUserThreshold(ref all_data); Console.WriteLine("Start iteration test"); ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 30% ITimedRatings training_data = new TimedRatings(); // 70% readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data); int K = 100; int rangeSize = 10; int user_count = all_data.AllUsers.Count; int location_count = all_data.AllItems.Count(); double[,] U1 = new double[user_count, K]; double[,] U2 = new double[user_count, K]; double[,] U3 = new double[user_count, K]; double[,] L1 = new double[location_count, K]; Dictionary <int, int> idMapperLocations = new Dictionary <int, int> (); Dictionary <int, int> idMapperUser = new Dictionary <int, int> (); initMatrixNormal(all_data.AllUsers, ref U1, ref idMapperUser, K); initMatrixNormal(all_data.AllUsers, ref U2, ref idMapperUser, K); initMatrixNormal(all_data.AllUsers, ref U3, ref idMapperUser, K); initMatrixNormal(all_data.AllItems, ref L1, ref idMapperLocations, K); WeatherItemRecommender recommender = new WeatherItemRecommender(U1, U2, U3, L1, idMapperLocations, idMapperUser); recommender.connection_string = connection; recommender.Ratings = training_data; recommender.Validation = validation_data; recommender.Test = test_data; recommender.weather_aware = weather_aware; recommender.max_iter = iterations; recommender.evaluation_at = 20; recommender.beta = beta; DateTime start_time = DateTime.Now; recommender.Train(); Console.Write("Total Training time needed:"); Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); Console.WriteLine("Final results in this iteration:"); var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false); results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false); }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="test_rating_format">whether there is a rating column in each line or not</param> /// <returns>the rating data</returns> static public ITimedRatings Read( TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, TestRatingFileFormat test_rating_format = TestRatingFileFormat.WITH_RATINGS) { if (user_mapping == null) { user_mapping = new IdentityMapping(); } if (item_mapping == null) { item_mapping = new IdentityMapping(); } var ratings = new TimedRatings(); string[] separators = { "::" }; string line; int seconds_pos = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? 3 : 2; while ((line = reader.ReadLine()) != null) { string[] tokens = line.Split(separators, StringSplitOptions.None); if (test_rating_format == TestRatingFileFormat.WITH_RATINGS && tokens.Length < 4) { throw new FormatException("Expected at least 4 columns: " + line); } if (test_rating_format == TestRatingFileFormat.WITHOUT_RATINGS && tokens.Length < 3) { throw new FormatException("Expected at least 3 columns: " + line); } int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? float.Parse(tokens[2], CultureInfo.InvariantCulture) : 0; long seconds = uint.Parse(tokens[seconds_pos]); var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); time -= offset; ratings.Add(user_id, item_id, rating, time); } return(ratings); }
public override void Train(Split split) { // Convert trainset to MyMediaLite trianset format if (DataType == IO.DataType.Ratings) { var mmlFeedback = new Ratings(); foreach (var feedback in split.Train) { var rating = (Rating)feedback; mmlFeedback.Add(UsersMap.ToInternalID(rating.User.Id), ItemsMap.ToInternalID(rating.Item.Id), rating.Value); } ((IRatingPredictor)MmlRecommenderInstance).Ratings = mmlFeedback; } else if (DataType == IO.DataType.TimeAwareRating) { var mmlFeedback = new TimedRatings(); var firstRatingMl10M = new DateTime(1998, 11, 1); foreach (var feedback in split.Train) { var rating = (Rating)feedback; var time = firstRatingMl10M.AddDays(double.Parse(feedback.Attributes["timestamp"].Value)); mmlFeedback.Add(UsersMap.ToInternalID(rating.User.Id), ItemsMap.ToInternalID(rating.Item.Id), rating.Value, time); } ((ITimeAwareRatingPredictor)MmlRecommenderInstance).Ratings = mmlFeedback; } else { var mmlFeedback = new PosOnlyFeedback <SparseBooleanMatrix>(); foreach (var feedback in split.Train) { mmlFeedback.Add(UsersMap.ToInternalID(feedback.User.Id), ItemsMap.ToInternalID(feedback.Item.Id)); } ((ItemRecommender)MmlRecommenderInstance).Feedback = mmlFeedback; if (MmlRecommenderInstance is IModelAwareRecommender) { ((IModelAwareRecommender)MmlRecommenderInstance).Model = this; } } Logger.Current.Trace("Training with MyMediaLite recommender..."); PureTrainTime = (int)Wrap.MeasureTime(delegate() { MmlRecommenderInstance.Train(); }).TotalMilliseconds; }
private static void startUserKNN(string data) { MyMediaLite.Data.Mapping user_mapping = new MyMediaLite.Data.Mapping(); MyMediaLite.Data.Mapping item_mapping = new MyMediaLite.Data.Mapping(); ITimedRatings all_data = readDataMapped(data, ref user_mapping, ref item_mapping); removeUserThreshold(ref all_data); Console.WriteLine("Start iteration Test UserKNN"); //for (int i = 0; i < 5; i++) { ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 20% ITimedRatings training_data = new TimedRatings(); // 70% readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data); IPosOnlyFeedback training_data_pos = new PosOnlyFeedback <SparseBooleanMatrix> (); // 80% for (int index = 0; index < training_data.Users.Count; index++) { training_data_pos.Add(training_data.Users [index], training_data.Items [index]); } MyMediaLite.ItemRecommendation.UserKNN recommender = new MyMediaLite.ItemRecommendation.UserKNN(); recommender.K = 80; recommender.Q = 1; recommender.Weighted = false; recommender.Alpha = 0.5f; recommender.Correlation = MyMediaLite.Correlation.BinaryCorrelationType.Jaccard; recommender.Feedback = training_data_pos; DateTime start_time = DateTime.Now; recommender.Train(); Console.Write("Total Training time needed:"); Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); Console.WriteLine("Final results in this iteration:"); var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false); results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false); //} }
// public static double startMuTuning(ITimedRatings all_data){ // // ITimedRatings validation_data = new TimedRatings (); // 10% // ITimedRatings test_data = new TimedRatings (); // 20% // ITimedRatings training_data = new TimedRatings (); // 70% // ITimedRatings sub_all_data = new TimedRatings(); // // getSubset (all_data, ref sub_all_data); // removeUserThreshold (ref sub_all_data); // readAndSplitData (sub_all_data, ref validation_data, ref test_data, ref training_data); // Dictionary<int,int> venueCategoryMapper = getCategories(sub_all_data); // // int K = 100; // int user_count = sub_all_data.AllUsers.Count; // int location_count = sub_all_data.AllItems.Count (); // Console.WriteLine (location_count); // int category_count = venueCategoryMapper.Values.ToList ().Distinct ().Count(); // // double[,] U1 = new double[user_count,K]; // double[,] U2 = new double[user_count,K]; // double[,] U3 = new double[user_count,K]; // double[,] U4 = new double[user_count,K]; // double[,] L1 = new double[location_count,K]; // double[,] CA1 = new double[category_count,K]; // Dictionary<int,int> idMapperCategories = new Dictionary<int,int>(); // Dictionary<int,int> idMapperLocations =new Dictionary<int,int>(); // Dictionary<int,int> idMapperUser = new Dictionary<int,int>(); // // // initMatrixNormal (sub_all_data.AllUsers, ref U3, ref idMapperUser, K); // initMatrixNormal (sub_all_data.AllUsers, ref U4, ref idMapperUser, K); // initMatrixNormal (sub_all_data.AllUsers, ref U1, ref idMapperUser, K); // initMatrixNormal (sub_all_data.AllUsers, ref U2, ref idMapperUser, K); // initMatrixNormal(sub_all_data.AllItems, ref L1,ref idMapperLocations, K); // initMatrixNormal(venueCategoryMapper.Values.ToList ().Distinct ().ToList(), ref CA1,ref idMapperCategories ,K); // Console.WriteLine (idMapperCategories.Keys.ToList ().Count); // for (int i = 0; i < idMapperCategories.Keys.ToList ().Count; i++) { // Console.Write (idMapperCategories.Keys.ToList () [i]); // Console.Write (", "); // Console.Write (idMapperCategories.Values.ToList () [i]); // } // List<MyMediaLite.Eval.ItemRecommendationEvaluationResults> result_list = new List<MyMediaLite.Eval.ItemRecommendationEvaluationResults> (); // // double mu = 0.1; // double best_mu = 0; // double best_mu_value = 0; // while (mu <= 1+0.01) { // +0.01 because of double rounding error // Console.WriteLine ("Start with mu = " + mu.ToString ()); // WeatherItemRecommender recommender = new WeatherItemRecommender (U1, U2, U3, U4, CA1, L1,idMapperLocations, idMapperCategories, idMapperUser); // recommender.connection_string = connection; // recommender.Ratings = training_data; // recommender.Validation = validation_data; // recommender.weather_category_aware = true; // recommender.weather_aware = false; // recommender.max_iter = 250; // recommender.evaluation_at = 251; // recommender.beta = 0f; // recommender.mu = mu; // DateTime start_time = DateTime.Now; // recommender.Train (); // Console.Write ("Total Training time needed:"); // Console.WriteLine (((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); // Console.WriteLine ("Final results in this iteration:"); // MyMediaLite.Eval.ItemRecommendationEvaluationResults results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime (recommender, test_data, training_data); // foreach (var key in results.Keys) // Console.WriteLine ("{0}={1}", key, results [key]); // if (results ["prec@5"] > best_mu_value) { // best_mu = mu; // best_mu_value = results ["prec@5"]; // } // Console.WriteLine ("Finished mu = " + mu.ToString ()); // mu += 0.1; // // } // return best_mu; // } public static void totalTest(string data_file, int mode, int city, int iterations, string feature) { ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 20% ITimedRatings training_data = new TimedRatings(); // 70% ITimedRatings all_data = readData(data_file); if (mode == 1) { // Console.WriteLine ("Start beta tuning"); // double beta = 0.1; // for (int i = 0; i < 5; i++) { // Console.WriteLine ("Start beta total iteration " + i.ToString ()); // beta = startBetaTuning (all_data); // Console.WriteLine ("Finished beta total iteration " + i.ToString ()); // Console.WriteLine ("Best beta = " + beta.ToString ()); // } // Console.WriteLine ("End beta tuning"); } if (mode == 2) { double beta = 0.2; Console.WriteLine("Start geo base-line algo"); startIterationTestNew(all_data, false, beta, 0f, city, iterations); Console.WriteLine("End geo base-line algo"); } if (mode == 3) { double beta = 0.2; Console.WriteLine("Start geo weather aware algo"); startIterationTest(all_data, true, beta, 0f, city, iterations); Console.WriteLine("End geo base-line algo"); } if (mode == 4) { double beta = 0.2; Console.WriteLine("Start weather context aware algo"); startIterationTestNew(all_data, true, beta, 0f, city, iterations, feature); Console.WriteLine("End weather context aware algo"); } if (mode == 5) { Console.WriteLine("Start most popular algo"); startMostPopular(all_data); Console.WriteLine("End most popular algo"); } if (mode == 6) { Console.WriteLine("Start ItemKNN algo"); startItemKNN(data_file); Console.WriteLine("End ItemKNN algo"); } if (mode == 7) { Console.WriteLine("Start UserKNN algo"); startUserKNN(data_file); Console.WriteLine("End UserKNN algo"); } if (mode == 8) { Console.WriteLine("Start WRMF algo"); startWRMF(all_data); Console.WriteLine("End WRMF algo"); } if (mode == 9) { Console.WriteLine("Start BPRMF algo"); startBPRMF(all_data); Console.WriteLine("End BPRMF algo"); } }