Identity mapping for entity IDs: Every original ID is mapped to itself
Inheritance: IEntityMapping
Beispiel #1
0
        /// <summary>Read in rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <param name="ignore_first_line">if true, ignore the first line</param>
        /// <returns>the rating data</returns>
        public static IRatings Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false)
        {
            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();
            if (ignore_first_line)
                reader.ReadLine();

            var ratings = new Ratings();

            string line;
            while ( (line = reader.ReadLine()) != null )
            {
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(Constants.SPLIT_CHARS);

                if (tokens.Length < 3)
                    throw new FormatException("Expected at least 3 columns: " + line);

                int user_id = user_mapping.ToInternalID(tokens[0]);
                int item_id = item_mapping.ToInternalID(tokens[1]);
                float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture);

                ratings.Add(user_id, item_id, rating);
            }
            ratings.InitScale();
            return ratings;
        }
        /// <summary>Read in rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <returns>the rating data</returns>
        public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null)
        {
            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();

            var ratings = new TimedRatings();

            string[] separators = { "::" };
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                string[] tokens = line.Split(separators, StringSplitOptions.None);

                if (tokens.Length < 4)
                    throw new FormatException(string.Format("Expected at least 4 columns: {0}", line));

                int user_id = user_mapping.ToInternalID(tokens[0]);
                int item_id = item_mapping.ToInternalID(tokens[1]);
                float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture);
                long seconds = uint.Parse(tokens[3]);

                var time = new DateTime(seconds * 10000000L).AddYears(1969);
                var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time);
                time -= offset;

                ratings.Add(user_id, item_id, rating, time);
            }
            return ratings;
        }
		/// <summary>Read in rating data which will be interpreted as implicit feedback data from a TextReader</summary>
		/// <param name="reader">the TextReader to be read from</param>
		/// <param name="rating_threshold">the minimum rating value needed to be accepted as positive feedback</param>
		/// <param name="user_mapping">user <see cref="IMapping"/> object</param>
		/// <param name="item_mapping">item <see cref="IMapping"/> object</param>
		/// <param name="ignore_first_line">if true, ignore the first line</param>
		/// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
		static public IPosOnlyFeedback Read(TextReader reader, float rating_threshold, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false)
		{
			if (user_mapping == null)
				user_mapping = new IdentityMapping();
			if (item_mapping == null)
				item_mapping = new IdentityMapping();
			if (ignore_first_line)
				reader.ReadLine();

			var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

			string line;
			while ((line = reader.ReadLine()) != null)
			{
				if (line.Trim().Length == 0)
					continue;

				string[] tokens = line.Split(Constants.SPLIT_CHARS);

				if (tokens.Length < 3)
					throw new FormatException("Expected at least 3 columns: " + line);

				int user_id   = user_mapping.ToInternalID(tokens[0]);
				int item_id   = item_mapping.ToInternalID(tokens[1]);
				float rating  = float.Parse(tokens[2], CultureInfo.InvariantCulture);

				if (rating >= rating_threshold)
					feedback.Add(user_id, item_id);
			}

			return feedback;
		}
		public void TestCase()
		{
			string filename = "../../../../tests/example.test";
			var mapping = new IdentityMapping();
			var ratings = RatingData.Read(filename);

			var recommender = new ExternalRatingPredictor() { PredictionFile = filename, UserMapping = mapping, ItemMapping = mapping };
			recommender.Train();
			for (int i = 0; i < ratings.Count; i++)
				Assert.AreEqual(ratings[i], recommender.Predict(ratings.Users[i], ratings.Items[i]));
		}
		/// <summary>Read in static rating data from a TextReader</summary>
		/// <param name="reader">the <see cref="TextReader"/> to read from</param>
		/// <param name="size">the number of ratings in the file</param>
		/// <param name="user_mapping">mapping object for user IDs</param>
		/// <param name="item_mapping">mapping object for item IDs</param>
		/// <param name="rating_type">the data type to be used for storing the ratings</param>
		/// <param name="test_rating_format">whether there is a rating column in each line or not</param>
		/// <param name="ignore_first_line">if true, ignore the first line</param>
		/// <returns>the rating data</returns>
		static public IRatings Read(
			TextReader reader, int size,
			IMapping user_mapping = null, IMapping item_mapping = null,
			RatingType rating_type = RatingType.FLOAT,
			TestRatingFileFormat test_rating_format = TestRatingFileFormat.WITH_RATINGS,
			bool ignore_first_line = false)
		{
			if (user_mapping == null)
				user_mapping = new IdentityMapping();
			if (item_mapping == null)
				item_mapping = new IdentityMapping();
			if (ignore_first_line)
				reader.ReadLine();

			IRatings ratings;
			if (rating_type == RatingType.BYTE)
				ratings = new StaticByteRatings(size);
			else if (rating_type == RatingType.FLOAT)
				ratings = new StaticRatings(size);
			else
				throw new FormatException(string.Format("Unknown rating type: {0}", rating_type));

			string line;
			while ((line = reader.ReadLine()) != null)
			{
				if (line.Length == 0)
					continue;

				string[] tokens = line.Split(Constants.SPLIT_CHARS);

				if (test_rating_format == TestRatingFileFormat.WITH_RATINGS && tokens.Length < 3)
					throw new FormatException("Expected at least 3 columns: " + line);
				if (test_rating_format == TestRatingFileFormat.WITHOUT_RATINGS && tokens.Length < 2)
					throw new FormatException("Expected at least 2 columns: " + line);

				int user_id = user_mapping.ToInternalID(tokens[0]);
				int item_id = item_mapping.ToInternalID(tokens[1]);
				float rating = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? float.Parse(tokens[2], CultureInfo.InvariantCulture) : 0;

				ratings.Add(user_id, item_id, rating);
			}
			ratings.InitScale();
			return ratings;
		}
		/// <summary>Read in rating data from a TextReader</summary>
		/// <param name="reader">the <see cref="TextReader"/> to read from</param>
		/// <param name="user_mapping">mapping object for user IDs</param>
		/// <param name="item_mapping">mapping object for item IDs</param>
		/// <param name="test_rating_format">whether there is a rating column in each line or not</param>
		/// <returns>the rating data</returns>
		static public ITimedRatings Read(
			TextReader reader,
			IMapping user_mapping = null, IMapping item_mapping = null,
			TestRatingFileFormat test_rating_format = TestRatingFileFormat.WITH_RATINGS)
		{
			if (user_mapping == null)
				user_mapping = new IdentityMapping();
			if (item_mapping == null)
				item_mapping = new IdentityMapping();

			var ratings = new TimedRatings();

			string[] separators = { "::" };
			string line;
			int seconds_pos = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? 3 : 2;

			while ((line = reader.ReadLine()) != null)
			{
				string[] tokens = line.Split(separators, StringSplitOptions.None);

				if (test_rating_format == TestRatingFileFormat.WITH_RATINGS && tokens.Length < 4)
					throw new FormatException("Expected at least 4 columns: " + line);
				if (test_rating_format == TestRatingFileFormat.WITHOUT_RATINGS && tokens.Length < 3)
					throw new FormatException("Expected at least 3 columns: " + line);

				int user_id = user_mapping.ToInternalID(tokens[0]);
				int item_id = item_mapping.ToInternalID(tokens[1]);
				float rating = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? float.Parse(tokens[2], CultureInfo.InvariantCulture) : 0;
				long seconds = uint.Parse(tokens[seconds_pos]);

				var time = new DateTime(seconds * 10000000L).AddYears(1969);
				var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time);
				time -= offset;

				ratings.Add(user_id, item_id, rating, time);
			}
			return ratings;
		}
Beispiel #7
0
		/// <summary>Rate a given set of instances and write it to a TextWriter</summary>
		/// <param name="recommender">rating predictor</param>
		/// <param name="ratings">test cases</param>
		/// <param name="writer">the TextWriter to write the predictions to</param>
		/// <param name="user_mapping">an <see cref="Mapping"/> object for the user IDs</param>
		/// <param name="item_mapping">an <see cref="Mapping"/> object for the item IDs</param>
		/// <param name="line_format">a format string specifying the line format; {0} is the user ID, {1} the item ID, {2} the rating</param>
		/// <param name="header">if specified, write this string at the start of the output</param>
		public static void WritePredictions(
			this IRecommender recommender,
			IRatings ratings,
			TextWriter writer,
			IMapping user_mapping = null,
			IMapping item_mapping = null,
			string line_format = "{0}\t{1}\t{2}",
			string header = null)
		{
			if (user_mapping == null)
				user_mapping = new IdentityMapping();
			if (item_mapping == null)
				item_mapping = new IdentityMapping();

			if (header != null)
				writer.WriteLine(header);

			if (line_format == "ranking")
			{
				foreach (int user_id in ratings.AllUsers)
					if (ratings.ByUser[user_id].Count > 0)
						recommender.WritePredictions(
							user_id,
							new List<int>(from index in ratings.ByUser[user_id] select ratings.Items[index]),
							new int[] { },
							ratings.ByUser[user_id].Count,
							writer,
							user_mapping, item_mapping);
			}
			else
				for (int index = 0; index < ratings.Count; index++)
					writer.WriteLine(
						line_format,
						user_mapping.ToOriginalID(ratings.Users[index]),
						item_mapping.ToOriginalID(ratings.Items[index]),
						recommender.Predict(ratings.Users[index], ratings.Items[index]).ToString(CultureInfo.InvariantCulture));
		}
Beispiel #8
0
        /// <summary>Read in implicit feedback data from a TextReader</summary>
        /// <param name="reader">the TextReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IMapping"/> object</param>
        /// <param name="ignore_first_line">if true, ignore the first line</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false)
        {
            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();
            if (ignore_first_line)
                reader.ReadLine();

            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            string line;
            while ((line = reader.ReadLine()) != null)
            {
                if (line.Trim().Length == 0)
                    continue;

                string[] tokens = line.Split(Constants.SPLIT_CHARS);

                if (tokens.Length < 2)
                    throw new FormatException("Expected at least 2 columns: " + line);

                try
                {
                    int user_id = user_mapping.ToInternalID(tokens[0]);
                    int item_id = item_mapping.ToInternalID(tokens[1]);
                    feedback.Add(user_id, item_id);
                }
                catch (Exception)
                {
                    throw new FormatException(string.Format("Could not read line '{0}'", line));
                }
            }

            return feedback;
        }
Beispiel #9
0
        /// <summary>Read in rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <param name="ignore_first_line">if true, ignore the first line</param>
        /// <returns>the rating data</returns>
        public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null, bool ignore_first_line = false)
        {
            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();
            if (ignore_first_line)
                reader.ReadLine();

            var ratings = new MyMediaLite.Data.TimedRatings();
            var time_split_chars = new char[] { ' ', '-', ':' };

            string line;
            while ((line = reader.ReadLine()) != null)
            {
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(Constants.SPLIT_CHARS);

                if (tokens.Length < 4)
                    throw new FormatException("Expected at least 4 columns: " + line);

                int user_id = user_mapping.ToInternalID(tokens[0]);
                int item_id = item_mapping.ToInternalID(tokens[1]);
                float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture);
                string date_string = tokens[3];
                if (tokens[3].StartsWith("\"") && tokens.Length > 4 && tokens[4].EndsWith("\""))
                {
                    date_string = tokens[3] + " " + tokens[4];
                    date_string = date_string.Substring(1, date_string.Length - 2);
                }

                uint seconds;
                if (date_string.Length == 19) // format "yyyy-mm-dd hh:mm:ss"
                {
                    var date_time_tokens = date_string.Split(time_split_chars);
                    ratings.Add(
                        user_id, item_id, rating,
                        new DateTime(
                            int.Parse(date_time_tokens[0]),
                            int.Parse(date_time_tokens[1]),
                            int.Parse(date_time_tokens[2]),
                            int.Parse(date_time_tokens[3]),
                            int.Parse(date_time_tokens[4]),
                            int.Parse(date_time_tokens[5])));
                }
                else if (date_string.Length == 10 && date_string[4] == '-') // format "yyyy-mm-dd"
                {
                    var date_time_tokens = date_string.Split(time_split_chars);
                    ratings.Add(
                        user_id, item_id, rating,
                        new DateTime(
                            int.Parse(date_time_tokens[0]),
                            int.Parse(date_time_tokens[1]),
                            int.Parse(date_time_tokens[2])));
                }
                else if (uint.TryParse(date_string, out seconds)) // unsigned integer value, interpreted as seconds since Unix epoch
                {
                    var time = new DateTime(seconds * 10000000L).AddYears(1969);
                    var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time);
                    ratings.Add(user_id, item_id, rating, time - offset);
                }
                else
                    ratings.Add(user_id, item_id, rating, DateTime.Parse(date_string, CultureInfo.InvariantCulture));

                if (ratings.Count % 200000 == 199999)
                    Console.Error.Write(".");
                if (ratings.Count % 12000000 == 11999999)
                    Console.Error.WriteLine();
            }
            ratings.InitScale();
            return ratings;
        }
Beispiel #10
0
        /// <summary>Write item predictions (scores) to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="user_id">ID of the user to make recommendations for</param>
        /// <param name="candidate_items">list of candidate items</param>
        /// <param name="ignore_items">list of items for which no predictions should be made</param>
        /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        public static void WritePredictions(
			this IRecommender recommender,
			int user_id,
			System.Collections.Generic.IList<int> candidate_items,
			System.Collections.Generic.ICollection<int> ignore_items,
			int num_predictions,
			TextWriter writer,
			IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            System.Collections.Generic.IList<Pair<int, float>> ordered_items;

            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();
            if (num_predictions == -1)
            {
                var scored_items = new List<Pair<int, float>>();
                foreach (int item_id in candidate_items)
                    if (!ignore_items.Contains(item_id))
                    {
                        float score = recommender.Predict(user_id, item_id);
                        if (score > float.MinValue)
                            scored_items.Add(new Pair<int, float>(item_id, score));
                    }
                ordered_items = scored_items.OrderByDescending(x => x.Second).ToArray();
            }
            else {
                var comparer = new DelegateComparer<Pair<int, float>>( (a, b) => a.Second.CompareTo(b.Second) );
                var heap = new IntervalHeap<Pair<int, float>>(num_predictions, comparer);
                float min_relevant_score = float.MinValue;

                foreach (int item_id in candidate_items)
                    if (!ignore_items.Contains(item_id))
                    {
                        float score = recommender.Predict(user_id, item_id);
                        if (score > min_relevant_score)
                        {
                            heap.Add(new Pair<int, float>(item_id, score));
                            if (heap.Count > num_predictions)
                            {
                                heap.DeleteMin();
                                min_relevant_score = heap.FindMin().Second;
                            }
                        }
                    }

                ordered_items = new Pair<int, float>[heap.Count];
                for (int i = 0; i < ordered_items.Count; i++)
                    ordered_items[i] = heap.DeleteMax();
            }

            writer.Write("{0}\t[", user_mapping.ToOriginalID(user_id));
            if (ordered_items.Count > 0)
            {
                writer.Write("{0}:{1}", item_mapping.ToOriginalID(ordered_items[0].First), ordered_items[0].Second.ToString(CultureInfo.InvariantCulture));
                for (int i = 1; i < ordered_items.Count; i++)
                {
                    int item_id = ordered_items[i].First;
                    float score = ordered_items[i].Second;
                    writer.Write(",{0}:{1}", item_mapping.ToOriginalID(item_id), score.ToString(CultureInfo.InvariantCulture));
                }
            }
            writer.WriteLine("]");
        }
Beispiel #11
0
        /// <summary>Write item predictions (scores) for a given user to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="user_id">ID of the user to make recommendations for</param>
        /// <param name="candidate_items">list of candidate items</param>
        /// <param name="ignore_items">list of items for which no predictions should be made</param>
        /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        /// <param name="user_mapping">an <see cref="IMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IMapping"/> object for the item IDs</param>
        public static void WritePredictions(
			this IRecommender recommender,
			int user_id,
			ICollection<int> candidate_items,
			ICollection<int> ignore_items,
			int num_predictions,
			TextWriter writer,
			IMapping user_mapping, IMapping item_mapping)
        {
            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();

            var ordered_items = recommender.Recommend(
                user_id, n:num_predictions,
                ignore_items:ignore_items, candidate_items:candidate_items);

            writer.Write("{0}\t[", user_mapping.ToOriginalID(user_id));
            if (ordered_items.Count > 0)
            {
                writer.Write("{0}:{1}", item_mapping.ToOriginalID(ordered_items[0].Item1), ordered_items[0].Item2.ToString(CultureInfo.InvariantCulture));
                for (int i = 1; i < ordered_items.Count; i++)
                {
                    int item_id = ordered_items[i].Item1;
                    float score = ordered_items[i].Item2;
                    writer.Write(",{0}:{1}", item_mapping.ToOriginalID(item_id), score.ToString(CultureInfo.InvariantCulture));
                }
            }
            writer.WriteLine("]");
        }