/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>the rating data</returns> public static IRatings Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); var ratings = new Ratings(); string line; while ( (line = reader.ReadLine()) != null ) { if (line.Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 3) throw new FormatException("Expected at least 3 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); ratings.Add(user_id, item_id, rating); } ratings.InitScale(); return ratings; }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <returns>the rating data</returns> public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); var ratings = new TimedRatings(); string[] separators = { "::" }; string line; while ((line = reader.ReadLine()) != null) { string[] tokens = line.Split(separators, StringSplitOptions.None); if (tokens.Length < 4) throw new FormatException(string.Format("Expected at least 4 columns: {0}", line)); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); long seconds = uint.Parse(tokens[3]); var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); time -= offset; ratings.Add(user_id, item_id, rating, time); } return ratings; }
/// <summary>Read in rating data which will be interpreted as implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(TextReader reader, float rating_threshold, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 3) throw new FormatException("Expected at least 3 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); if (rating >= rating_threshold) feedback.Add(user_id, item_id); } return feedback; }
public void TestCase() { string filename = "../../../../tests/example.test"; var mapping = new IdentityMapping(); var ratings = RatingData.Read(filename); var recommender = new ExternalRatingPredictor() { PredictionFile = filename, UserMapping = mapping, ItemMapping = mapping }; recommender.Train(); for (int i = 0; i < ratings.Count; i++) Assert.AreEqual(ratings[i], recommender.Predict(ratings.Users[i], ratings.Items[i])); }
/// <summary>Read in static rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="size">the number of ratings in the file</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="rating_type">the data type to be used for storing the ratings</param> /// <param name="test_rating_format">whether there is a rating column in each line or not</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>the rating data</returns> static public IRatings Read( TextReader reader, int size, IMapping user_mapping = null, IMapping item_mapping = null, RatingType rating_type = RatingType.FLOAT, TestRatingFileFormat test_rating_format = TestRatingFileFormat.WITH_RATINGS, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); IRatings ratings; if (rating_type == RatingType.BYTE) ratings = new StaticByteRatings(size); else if (rating_type == RatingType.FLOAT) ratings = new StaticRatings(size); else throw new FormatException(string.Format("Unknown rating type: {0}", rating_type)); string line; while ((line = reader.ReadLine()) != null) { if (line.Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (test_rating_format == TestRatingFileFormat.WITH_RATINGS && tokens.Length < 3) throw new FormatException("Expected at least 3 columns: " + line); if (test_rating_format == TestRatingFileFormat.WITHOUT_RATINGS && tokens.Length < 2) throw new FormatException("Expected at least 2 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? float.Parse(tokens[2], CultureInfo.InvariantCulture) : 0; ratings.Add(user_id, item_id, rating); } ratings.InitScale(); return ratings; }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="test_rating_format">whether there is a rating column in each line or not</param> /// <returns>the rating data</returns> static public ITimedRatings Read( TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, TestRatingFileFormat test_rating_format = TestRatingFileFormat.WITH_RATINGS) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); var ratings = new TimedRatings(); string[] separators = { "::" }; string line; int seconds_pos = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? 3 : 2; while ((line = reader.ReadLine()) != null) { string[] tokens = line.Split(separators, StringSplitOptions.None); if (test_rating_format == TestRatingFileFormat.WITH_RATINGS && tokens.Length < 4) throw new FormatException("Expected at least 4 columns: " + line); if (test_rating_format == TestRatingFileFormat.WITHOUT_RATINGS && tokens.Length < 3) throw new FormatException("Expected at least 3 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? float.Parse(tokens[2], CultureInfo.InvariantCulture) : 0; long seconds = uint.Parse(tokens[seconds_pos]); var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); time -= offset; ratings.Add(user_id, item_id, rating, time); } return ratings; }
/// <summary>Rate a given set of instances and write it to a TextWriter</summary> /// <param name="recommender">rating predictor</param> /// <param name="ratings">test cases</param> /// <param name="writer">the TextWriter to write the predictions to</param> /// <param name="user_mapping">an <see cref="Mapping"/> object for the user IDs</param> /// <param name="item_mapping">an <see cref="Mapping"/> object for the item IDs</param> /// <param name="line_format">a format string specifying the line format; {0} is the user ID, {1} the item ID, {2} the rating</param> /// <param name="header">if specified, write this string at the start of the output</param> public static void WritePredictions( this IRecommender recommender, IRatings ratings, TextWriter writer, IMapping user_mapping = null, IMapping item_mapping = null, string line_format = "{0}\t{1}\t{2}", string header = null) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (header != null) writer.WriteLine(header); if (line_format == "ranking") { foreach (int user_id in ratings.AllUsers) if (ratings.ByUser[user_id].Count > 0) recommender.WritePredictions( user_id, new List<int>(from index in ratings.ByUser[user_id] select ratings.Items[index]), new int[] { }, ratings.ByUser[user_id].Count, writer, user_mapping, item_mapping); } else for (int index = 0; index < ratings.Count; index++) writer.WriteLine( line_format, user_mapping.ToOriginalID(ratings.Users[index]), item_mapping.ToOriginalID(ratings.Items[index]), recommender.Predict(ratings.Users[index], ratings.Items[index]).ToString(CultureInfo.InvariantCulture)); }
/// <summary>Read in implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="user_mapping">user <see cref="IMapping"/> object</param> /// <param name="item_mapping">item <see cref="IMapping"/> object</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 2) throw new FormatException("Expected at least 2 columns: " + line); try { int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); feedback.Add(user_id, item_id); } catch (Exception) { throw new FormatException(string.Format("Could not read line '{0}'", line)); } } return feedback; }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="ignore_first_line">if true, ignore the first line</param> /// <returns>the rating data</returns> public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null, bool ignore_first_line = false) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (ignore_first_line) reader.ReadLine(); var ratings = new MyMediaLite.Data.TimedRatings(); var time_split_chars = new char[] { ' ', '-', ':' }; string line; while ((line = reader.ReadLine()) != null) { if (line.Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length < 4) throw new FormatException("Expected at least 4 columns: " + line); int user_id = user_mapping.ToInternalID(tokens[0]); int item_id = item_mapping.ToInternalID(tokens[1]); float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture); string date_string = tokens[3]; if (tokens[3].StartsWith("\"") && tokens.Length > 4 && tokens[4].EndsWith("\"")) { date_string = tokens[3] + " " + tokens[4]; date_string = date_string.Substring(1, date_string.Length - 2); } uint seconds; if (date_string.Length == 19) // format "yyyy-mm-dd hh:mm:ss" { var date_time_tokens = date_string.Split(time_split_chars); ratings.Add( user_id, item_id, rating, new DateTime( int.Parse(date_time_tokens[0]), int.Parse(date_time_tokens[1]), int.Parse(date_time_tokens[2]), int.Parse(date_time_tokens[3]), int.Parse(date_time_tokens[4]), int.Parse(date_time_tokens[5]))); } else if (date_string.Length == 10 && date_string[4] == '-') // format "yyyy-mm-dd" { var date_time_tokens = date_string.Split(time_split_chars); ratings.Add( user_id, item_id, rating, new DateTime( int.Parse(date_time_tokens[0]), int.Parse(date_time_tokens[1]), int.Parse(date_time_tokens[2]))); } else if (uint.TryParse(date_string, out seconds)) // unsigned integer value, interpreted as seconds since Unix epoch { var time = new DateTime(seconds * 10000000L).AddYears(1969); var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time); ratings.Add(user_id, item_id, rating, time - offset); } else ratings.Add(user_id, item_id, rating, DateTime.Parse(date_string, CultureInfo.InvariantCulture)); if (ratings.Count % 200000 == 199999) Console.Error.Write("."); if (ratings.Count % 12000000 == 11999999) Console.Error.WriteLine(); } ratings.InitScale(); return ratings; }
/// <summary>Write item predictions (scores) to a TextWriter object</summary> /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param> /// <param name="user_id">ID of the user to make recommendations for</param> /// <param name="candidate_items">list of candidate items</param> /// <param name="ignore_items">list of items for which no predictions should be made</param> /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param> /// <param name="writer">the <see cref="TextWriter"/> to write to</param> /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param> /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param> public static void WritePredictions( this IRecommender recommender, int user_id, System.Collections.Generic.IList<int> candidate_items, System.Collections.Generic.ICollection<int> ignore_items, int num_predictions, TextWriter writer, IEntityMapping user_mapping, IEntityMapping item_mapping) { System.Collections.Generic.IList<Pair<int, float>> ordered_items; if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); if (num_predictions == -1) { var scored_items = new List<Pair<int, float>>(); foreach (int item_id in candidate_items) if (!ignore_items.Contains(item_id)) { float score = recommender.Predict(user_id, item_id); if (score > float.MinValue) scored_items.Add(new Pair<int, float>(item_id, score)); } ordered_items = scored_items.OrderByDescending(x => x.Second).ToArray(); } else { var comparer = new DelegateComparer<Pair<int, float>>( (a, b) => a.Second.CompareTo(b.Second) ); var heap = new IntervalHeap<Pair<int, float>>(num_predictions, comparer); float min_relevant_score = float.MinValue; foreach (int item_id in candidate_items) if (!ignore_items.Contains(item_id)) { float score = recommender.Predict(user_id, item_id); if (score > min_relevant_score) { heap.Add(new Pair<int, float>(item_id, score)); if (heap.Count > num_predictions) { heap.DeleteMin(); min_relevant_score = heap.FindMin().Second; } } } ordered_items = new Pair<int, float>[heap.Count]; for (int i = 0; i < ordered_items.Count; i++) ordered_items[i] = heap.DeleteMax(); } writer.Write("{0}\t[", user_mapping.ToOriginalID(user_id)); if (ordered_items.Count > 0) { writer.Write("{0}:{1}", item_mapping.ToOriginalID(ordered_items[0].First), ordered_items[0].Second.ToString(CultureInfo.InvariantCulture)); for (int i = 1; i < ordered_items.Count; i++) { int item_id = ordered_items[i].First; float score = ordered_items[i].Second; writer.Write(",{0}:{1}", item_mapping.ToOriginalID(item_id), score.ToString(CultureInfo.InvariantCulture)); } } writer.WriteLine("]"); }
/// <summary>Write item predictions (scores) for a given user to a TextWriter object</summary> /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param> /// <param name="user_id">ID of the user to make recommendations for</param> /// <param name="candidate_items">list of candidate items</param> /// <param name="ignore_items">list of items for which no predictions should be made</param> /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param> /// <param name="writer">the <see cref="TextWriter"/> to write to</param> /// <param name="user_mapping">an <see cref="IMapping"/> object for the user IDs</param> /// <param name="item_mapping">an <see cref="IMapping"/> object for the item IDs</param> public static void WritePredictions( this IRecommender recommender, int user_id, ICollection<int> candidate_items, ICollection<int> ignore_items, int num_predictions, TextWriter writer, IMapping user_mapping, IMapping item_mapping) { if (user_mapping == null) user_mapping = new IdentityMapping(); if (item_mapping == null) item_mapping = new IdentityMapping(); var ordered_items = recommender.Recommend( user_id, n:num_predictions, ignore_items:ignore_items, candidate_items:candidate_items); writer.Write("{0}\t[", user_mapping.ToOriginalID(user_id)); if (ordered_items.Count > 0) { writer.Write("{0}:{1}", item_mapping.ToOriginalID(ordered_items[0].Item1), ordered_items[0].Item2.ToString(CultureInfo.InvariantCulture)); for (int i = 1; i < ordered_items.Count; i++) { int item_id = ordered_items[i].Item1; float score = ordered_items[i].Item2; writer.Write(",{0}:{1}", item_mapping.ToOriginalID(item_id), score.ToString(CultureInfo.InvariantCulture)); } } writer.WriteLine("]"); }