public void TestRatioSplit() { var ratings = new TimedRatings(); ratings.Add(0, 0, 5.0f, new DateTime(2011, 10, 31)); ratings.Add(0, 1, 4.5f, new DateTime(2011, 11, 1)); ratings.Add(0, 2, 5.0f, new DateTime(2011, 11, 3)); ratings.Add(0, 3, 4.5f, new DateTime(2011, 11, 4)); ratings.Add(1, 0, 1.0f, new DateTime(2011, 10, 31)); ratings.Add(1, 1, 2.5f, new DateTime(2011, 11, 2)); ratings.Add(1, 2, 1.0f, new DateTime(2011, 12, 1)); ratings.Add(1, 3, 2.5f, new DateTime(2011, 12, 4)); var split1 = new RatingsPerUserChronologicalSplit(ratings, 0.25); Assert.AreEqual(6, split1.Train[0].Count); Assert.AreEqual(2, split1.Test[0].Count); Assert.AreEqual(3, split1.Train[0].ByUser[0].Count); Assert.AreEqual(3, split1.Train[0].ByUser[1].Count); Assert.AreEqual(1, split1.Test[0].ByUser[0].Count); Assert.AreEqual(1, split1.Test[0].ByUser[1].Count); var split2 = new RatingsPerUserChronologicalSplit(ratings, 0.5); Assert.AreEqual(4, split2.Train[0].Count); Assert.AreEqual(4, split2.Test[0].Count); Assert.AreEqual(2, split2.Train[0].ByUser[0].Count); Assert.AreEqual(2, split2.Train[0].ByUser[1].Count); Assert.AreEqual(2, split2.Test[0].ByUser[0].Count); Assert.AreEqual(2, split2.Test[0].ByUser[1].Count); }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <returns>the rating data</returns> public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); var ratings = new TimedRatings(); string[] separators = { "::" }; string line; while ((line = reader.ReadLine()) != null) { string[] tokens = line.Split(separators, StringSplitOptions.None); if (tokens.Length < 4) throw new FormatException(string.Format("Expected at least 4 columns: {0}", line)); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); long seconds = uint.Parse(tokens[3]); var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); time -= offset; ratings.Add(user_id, item_id, rating, time); } return ratings; }
public void TestTimeSplit() { var ratings = new TimedRatings(); ratings.Add(0, 0, 5.0f, new DateTime(2011, 10, 31)); ratings.Add(0, 1, 4.5f, new DateTime(2011, 11, 1)); ratings.Add(1, 0, 1.0f, new DateTime(2011, 10, 31)); ratings.Add(1, 1, 2.5f, new DateTime(2011, 11, 2)); var split1 = new RatingsChronologicalSplit(ratings, new DateTime(2011, 11, 2)); Assert.AreEqual(3, split1.Train[0].Count); Assert.AreEqual(1, split1.Test[0].Count); Assert.AreEqual(2, split1.Train[0].ByUser[0].Count); Assert.AreEqual(1, split1.Train[0].ByUser[1].Count); Assert.AreEqual(0, split1.Test[0].ByUser[0].Count); Assert.AreEqual(1, split1.Test[0].ByUser[1].Count); Assert.AreEqual(new DateTime(2011, 10, 31), split1.Train[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 11, 1), split1.Train[0].LatestTime); Assert.AreEqual(new DateTime(2011, 11, 2), split1.Test[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 11, 2), split1.Test[0].LatestTime); var split2 = new RatingsChronologicalSplit(ratings, new DateTime(2011, 11, 1)); Assert.AreEqual(2, split2.Train[0].Count); Assert.AreEqual(2, split2.Test[0].Count); Assert.AreEqual(1, split2.Train[0].ByUser[0].Count); Assert.AreEqual(1, split2.Train[0].ByUser[1].Count); Assert.AreEqual(1, split2.Test[0].ByUser[0].Count); Assert.AreEqual(1, split2.Test[0].ByUser[1].Count); Assert.AreEqual(new DateTime(2011, 10, 31), split2.Train[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 10, 31), split2.Train[0].LatestTime); Assert.AreEqual(new DateTime(2011, 11, 1), split2.Test[0].EarliestTime); Assert.AreEqual(new DateTime(2011, 11, 2), split2.Test[0].LatestTime); }
public static IRatings CreateRandomTimedRatings(int num_users, int num_items, int num_ratings) { var random = MyMediaLite.Random.GetInstance(); var ratings = new TimedRatings(); for (int i = 0; i < num_ratings; i++) { int user_id = random.Next(num_users); int item_id = random.Next(num_items); int rating_value = 1 + random.Next(5); ratings.Add(user_id, item_id, rating_value, DateTime.Now); } return ratings; }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="test_rating_format">whether there is a rating column in each line or not</param> /// <returns>the rating data</returns> static public ITimedRatings Read( TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, TestRatingFileFormat test_rating_format = TestRatingFileFormat.WITH_RATINGS) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); var ratings = new TimedRatings(); string[] separators = { "::" }; string line; int seconds_pos = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? 3 : 2; while ((line = reader.ReadLine()) != null) { string[] tokens = line.Split(separators, StringSplitOptions.None); if (test_rating_format == TestRatingFileFormat.WITH_RATINGS && tokens.Length < 4) throw new FormatException("Expected at least 4 columns: " + line); if (test_rating_format == TestRatingFileFormat.WITHOUT_RATINGS && tokens.Length < 3) throw new FormatException("Expected at least 3 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? float.Parse(tokens[2], CultureInfo.InvariantCulture) : 0; long seconds = uint.Parse(tokens[seconds_pos]); var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); time -= offset; ratings.Add(user_id, item_id, rating, time); } return ratings; }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>the rating data</returns> static public ITimedRatings Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) { user_mapping = new IdentityMapping(); } if (item_mapping == null) { item_mapping = new IdentityMapping(); } if (ignore_first_line) { reader.ReadLine(); } var ratings = new MyMediaLite.Data.TimedRatings(); var time_split_chars = new char[] { ' ', '-', ':' }; string line; while ((line = reader.ReadLine()) != null) { if (line.Length == 0) { continue; } string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 4) { throw new FormatException("Expected at least 4 columns: " + line); } int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); string date_string = tokens[3]; if (tokens[3].StartsWith("\"") && tokens.Length > 4 && tokens[4].EndsWith("\"")) { date_string = tokens[3] + " " + tokens[4]; date_string = date_string.Substring(1, date_string.Length - 2); } uint seconds; if (date_string.Length == 19) // format "yyyy-mm-dd hh:mm:ss" { var date_time_tokens = date_string.Split(time_split_chars); ratings.Add( user_id, item_id, rating, new DateTime( int.Parse(date_time_tokens[0]), int.Parse(date_time_tokens[1]), int.Parse(date_time_tokens[2]), int.Parse(date_time_tokens[3]), int.Parse(date_time_tokens[4]), int.Parse(date_time_tokens[5]))); } else if (date_string.Length == 10 && date_string[4] == '-') // format "yyyy-mm-dd" { var date_time_tokens = date_string.Split(time_split_chars); ratings.Add( user_id, item_id, rating, new DateTime( int.Parse(date_time_tokens[0]), int.Parse(date_time_tokens[1]), int.Parse(date_time_tokens[2]))); } else if (uint.TryParse(date_string, out seconds)) // unsigned integer value, interpreted as seconds since Unix epoch { var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); ratings.Add(user_id, item_id, rating, time - offset); } else { ratings.Add(user_id, item_id, rating, DateTime.Parse(date_string, CultureInfo.InvariantCulture)); } if (ratings.Count % 200000 == 199999) { Console.Error.Write("."); } if (ratings.Count % 12000000 == 11999999) { Console.Error.WriteLine(); } } ratings.InitScale(); return(ratings); }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>the rating data</returns> public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); var ratings = new MyMediaLite.Data.TimedRatings(); var time_split_chars = new char[] { ' ', '-', ':' }; string line; while ((line = reader.ReadLine()) != null) { if (line.Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 4) throw new FormatException("Expected at least 4 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); string date_string = tokens[3]; if (tokens[3].StartsWith("\"") && tokens.Length > 4 && tokens[4].EndsWith("\"")) { date_string = tokens[3] + " " + tokens[4]; date_string = date_string.Substring(1, date_string.Length - 2); } uint seconds; if (date_string.Length == 19) // format "yyyy-mm-dd hh:mm:ss" { var date_time_tokens = date_string.Split(time_split_chars); ratings.Add( user_id, item_id, rating, new DateTime( int.Parse(date_time_tokens[0]), int.Parse(date_time_tokens[1]), int.Parse(date_time_tokens[2]), int.Parse(date_time_tokens[3]), int.Parse(date_time_tokens[4]), int.Parse(date_time_tokens[5]))); } else if (date_string.Length == 10 && date_string[4] == '-') // format "yyyy-mm-dd" { var date_time_tokens = date_string.Split(time_split_chars); ratings.Add( user_id, item_id, rating, new DateTime( int.Parse(date_time_tokens[0]), int.Parse(date_time_tokens[1]), int.Parse(date_time_tokens[2]))); } else if (uint.TryParse(date_string, out seconds)) // unsigned integer value, interpreted as seconds since Unix epoch { var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); ratings.Add(user_id, item_id, rating, time - offset); } else ratings.Add(user_id, item_id, rating, DateTime.Parse(date_string, CultureInfo.InvariantCulture)); if (ratings.Count % 200000 == 199999) Console.Error.Write("."); if (ratings.Count % 12000000 == 11999999) Console.Error.WriteLine(); } ratings.InitScale(); return ratings; }