public void TestRatioSplit()
		{
			var ratings = new TimedRatings();
			ratings.Add(0, 0, 5.0f, new DateTime(2011, 10, 31));
			ratings.Add(0, 1, 4.5f, new DateTime(2011, 11, 1));
			ratings.Add(0, 2, 5.0f, new DateTime(2011, 11, 3));
			ratings.Add(0, 3, 4.5f, new DateTime(2011, 11, 4));
			ratings.Add(1, 0, 1.0f, new DateTime(2011, 10, 31));
			ratings.Add(1, 1, 2.5f, new DateTime(2011, 11, 2));
			ratings.Add(1, 2, 1.0f, new DateTime(2011, 12, 1));
			ratings.Add(1, 3, 2.5f, new DateTime(2011, 12, 4));

			var split1 = new RatingsPerUserChronologicalSplit(ratings, 0.25);
			Assert.AreEqual(6, split1.Train[0].Count);
			Assert.AreEqual(2, split1.Test[0].Count);
			Assert.AreEqual(3, split1.Train[0].ByUser[0].Count);
			Assert.AreEqual(3, split1.Train[0].ByUser[1].Count);
			Assert.AreEqual(1, split1.Test[0].ByUser[0].Count);
			Assert.AreEqual(1, split1.Test[0].ByUser[1].Count);

			var split2 = new RatingsPerUserChronologicalSplit(ratings, 0.5);
			Assert.AreEqual(4, split2.Train[0].Count);
			Assert.AreEqual(4, split2.Test[0].Count);
			Assert.AreEqual(2, split2.Train[0].ByUser[0].Count);
			Assert.AreEqual(2, split2.Train[0].ByUser[1].Count);
			Assert.AreEqual(2, split2.Test[0].ByUser[0].Count);
			Assert.AreEqual(2, split2.Test[0].ByUser[1].Count);
		}
예제 #2
0
        /// <summary>Read in rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <returns>the rating data</returns>
        public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null)
        {
            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();

            var ratings = new TimedRatings();

            string[] separators = { "::" };
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                string[] tokens = line.Split(separators, StringSplitOptions.None);

                if (tokens.Length < 4)
                    throw new FormatException(string.Format("Expected at least 4 columns: {0}", line));

                int user_id = user_mapping.ToInternalID(tokens[0]);
                int item_id = item_mapping.ToInternalID(tokens[1]);
                float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture);
                long seconds = uint.Parse(tokens[3]);

                var time = new DateTime(seconds * 10000000L).AddYears(1969);
                var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time);
                time -= offset;

                ratings.Add(user_id, item_id, rating, time);
            }
            return ratings;
        }
		public void TestTimeSplit()
		{
			var ratings = new TimedRatings();
			ratings.Add(0, 0, 5.0f, new DateTime(2011, 10, 31));
			ratings.Add(0, 1, 4.5f, new DateTime(2011, 11, 1));
			ratings.Add(1, 0, 1.0f, new DateTime(2011, 10, 31));
			ratings.Add(1, 1, 2.5f, new DateTime(2011, 11, 2));

			var split1 = new RatingsChronologicalSplit(ratings, new DateTime(2011, 11, 2));
			Assert.AreEqual(3, split1.Train[0].Count);
			Assert.AreEqual(1, split1.Test[0].Count);
			Assert.AreEqual(2, split1.Train[0].ByUser[0].Count);
			Assert.AreEqual(1, split1.Train[0].ByUser[1].Count);
			Assert.AreEqual(0, split1.Test[0].ByUser[0].Count);
			Assert.AreEqual(1, split1.Test[0].ByUser[1].Count);
			Assert.AreEqual(new DateTime(2011, 10, 31), split1.Train[0].EarliestTime);
			Assert.AreEqual(new DateTime(2011, 11, 1),  split1.Train[0].LatestTime);
			Assert.AreEqual(new DateTime(2011, 11, 2), split1.Test[0].EarliestTime);
			Assert.AreEqual(new DateTime(2011, 11, 2), split1.Test[0].LatestTime);

			var split2 = new RatingsChronologicalSplit(ratings, new DateTime(2011, 11, 1));
			Assert.AreEqual(2, split2.Train[0].Count);
			Assert.AreEqual(2, split2.Test[0].Count);
			Assert.AreEqual(1, split2.Train[0].ByUser[0].Count);
			Assert.AreEqual(1, split2.Train[0].ByUser[1].Count);
			Assert.AreEqual(1, split2.Test[0].ByUser[0].Count);
			Assert.AreEqual(1, split2.Test[0].ByUser[1].Count);
			Assert.AreEqual(new DateTime(2011, 10, 31), split2.Train[0].EarliestTime);
			Assert.AreEqual(new DateTime(2011, 10, 31), split2.Train[0].LatestTime);
			Assert.AreEqual(new DateTime(2011, 11, 1), split2.Test[0].EarliestTime);
			Assert.AreEqual(new DateTime(2011, 11, 2), split2.Test[0].LatestTime);
		}
예제 #4
0
        public static IRatings CreateRandomTimedRatings(int num_users, int num_items, int num_ratings)
        {
            var random = MyMediaLite.Random.GetInstance();

            var ratings = new TimedRatings();
            for (int i = 0; i < num_ratings; i++)
            {
                int user_id = random.Next(num_users);
                int item_id = random.Next(num_items);
                int rating_value = 1 + random.Next(5);
                ratings.Add(user_id, item_id, rating_value, DateTime.Now);
            }
            return ratings;
        }
예제 #5
0
		/// <summary>Read in rating data from a TextReader</summary>
		/// <param name="reader">the <see cref="TextReader"/> to read from</param>
		/// <param name="user_mapping">mapping object for user IDs</param>
		/// <param name="item_mapping">mapping object for item IDs</param>
		/// <param name="test_rating_format">whether there is a rating column in each line or not</param>
		/// <returns>the rating data</returns>
		static public ITimedRatings Read(
			TextReader reader,
			IMapping user_mapping = null, IMapping item_mapping = null,
			TestRatingFileFormat test_rating_format = TestRatingFileFormat.WITH_RATINGS)
		{
			if (user_mapping == null)
				user_mapping = new IdentityMapping();
			if (item_mapping == null)
				item_mapping = new IdentityMapping();

			var ratings = new TimedRatings();

			string[] separators = { "::" };
			string line;
			int seconds_pos = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? 3 : 2;

			while ((line = reader.ReadLine()) != null)
			{
				string[] tokens = line.Split(separators, StringSplitOptions.None);

				if (test_rating_format == TestRatingFileFormat.WITH_RATINGS && tokens.Length < 4)
					throw new FormatException("Expected at least 4 columns: " + line);
				if (test_rating_format == TestRatingFileFormat.WITHOUT_RATINGS && tokens.Length < 3)
					throw new FormatException("Expected at least 3 columns: " + line);

				int user_id = user_mapping.ToInternalID(tokens[0]);
				int item_id = item_mapping.ToInternalID(tokens[1]);
				float rating = test_rating_format == TestRatingFileFormat.WITH_RATINGS ? float.Parse(tokens[2], CultureInfo.InvariantCulture) : 0;
				long seconds = uint.Parse(tokens[seconds_pos]);

				var time = new DateTime(seconds * 10000000L).AddYears(1969);
				var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time);
				time -= offset;

				ratings.Add(user_id, item_id, rating, time);
			}
			return ratings;
		}
        /// <summary>Read in rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <param name="ignore_first_line">if true, ignore the first line</param>
        /// <returns>the rating data</returns>
        static public ITimedRatings Read(TextReader reader, IMapping user_mapping = null, IMapping item_mapping = null, bool ignore_first_line = false)
        {
            if (user_mapping == null)
            {
                user_mapping = new IdentityMapping();
            }
            if (item_mapping == null)
            {
                item_mapping = new IdentityMapping();
            }
            if (ignore_first_line)
            {
                reader.ReadLine();
            }

            var ratings          = new MyMediaLite.Data.TimedRatings();
            var time_split_chars = new char[] { ' ', '-', ':' };

            string line;

            while ((line = reader.ReadLine()) != null)
            {
                if (line.Length == 0)
                {
                    continue;
                }

                string[] tokens = line.Split(Constants.SPLIT_CHARS);

                if (tokens.Length < 4)
                {
                    throw new FormatException("Expected at least 4 columns: " + line);
                }

                int    user_id     = user_mapping.ToInternalID(tokens[0]);
                int    item_id     = item_mapping.ToInternalID(tokens[1]);
                float  rating      = float.Parse(tokens[2], CultureInfo.InvariantCulture);
                string date_string = tokens[3];
                if (tokens[3].StartsWith("\"") && tokens.Length > 4 && tokens[4].EndsWith("\""))
                {
                    date_string = tokens[3] + " " + tokens[4];
                    date_string = date_string.Substring(1, date_string.Length - 2);
                }

                uint seconds;
                if (date_string.Length == 19)                 // format "yyyy-mm-dd hh:mm:ss"
                {
                    var date_time_tokens = date_string.Split(time_split_chars);
                    ratings.Add(
                        user_id, item_id, rating,
                        new DateTime(
                            int.Parse(date_time_tokens[0]),
                            int.Parse(date_time_tokens[1]),
                            int.Parse(date_time_tokens[2]),
                            int.Parse(date_time_tokens[3]),
                            int.Parse(date_time_tokens[4]),
                            int.Parse(date_time_tokens[5])));
                }
                else if (date_string.Length == 10 && date_string[4] == '-')                 // format "yyyy-mm-dd"
                {
                    var date_time_tokens = date_string.Split(time_split_chars);
                    ratings.Add(
                        user_id, item_id, rating,
                        new DateTime(
                            int.Parse(date_time_tokens[0]),
                            int.Parse(date_time_tokens[1]),
                            int.Parse(date_time_tokens[2])));
                }
                else if (uint.TryParse(date_string, out seconds))                 // unsigned integer value, interpreted as seconds since Unix epoch
                {
                    var time   = new DateTime(seconds * 10000000L).AddYears(1969);
                    var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time);
                    ratings.Add(user_id, item_id, rating, time - offset);
                }
                else
                {
                    ratings.Add(user_id, item_id, rating, DateTime.Parse(date_string, CultureInfo.InvariantCulture));
                }

                if (ratings.Count % 200000 == 199999)
                {
                    Console.Error.Write(".");
                }
                if (ratings.Count % 12000000 == 11999999)
                {
                    Console.Error.WriteLine();
                }
            }
            ratings.InitScale();
            return(ratings);
        }
예제 #7
0
        /// <summary>Read in rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <param name="ignore_first_line">if true, ignore the first line</param>
        /// <returns>the rating data</returns>
        public static ITimedRatings Read(TextReader reader, IEntityMapping user_mapping = null, IEntityMapping item_mapping = null, bool ignore_first_line = false)
        {
            if (user_mapping == null)
                user_mapping = new IdentityMapping();
            if (item_mapping == null)
                item_mapping = new IdentityMapping();
            if (ignore_first_line)
                reader.ReadLine();

            var ratings = new MyMediaLite.Data.TimedRatings();
            var time_split_chars = new char[] { ' ', '-', ':' };

            string line;
            while ((line = reader.ReadLine()) != null)
            {
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(Constants.SPLIT_CHARS);

                if (tokens.Length < 4)
                    throw new FormatException("Expected at least 4 columns: " + line);

                int user_id = user_mapping.ToInternalID(tokens[0]);
                int item_id = item_mapping.ToInternalID(tokens[1]);
                float rating = float.Parse(tokens[2], CultureInfo.InvariantCulture);
                string date_string = tokens[3];
                if (tokens[3].StartsWith("\"") && tokens.Length > 4 && tokens[4].EndsWith("\""))
                {
                    date_string = tokens[3] + " " + tokens[4];
                    date_string = date_string.Substring(1, date_string.Length - 2);
                }

                uint seconds;
                if (date_string.Length == 19) // format "yyyy-mm-dd hh:mm:ss"
                {
                    var date_time_tokens = date_string.Split(time_split_chars);
                    ratings.Add(
                        user_id, item_id, rating,
                        new DateTime(
                            int.Parse(date_time_tokens[0]),
                            int.Parse(date_time_tokens[1]),
                            int.Parse(date_time_tokens[2]),
                            int.Parse(date_time_tokens[3]),
                            int.Parse(date_time_tokens[4]),
                            int.Parse(date_time_tokens[5])));
                }
                else if (date_string.Length == 10 && date_string[4] == '-') // format "yyyy-mm-dd"
                {
                    var date_time_tokens = date_string.Split(time_split_chars);
                    ratings.Add(
                        user_id, item_id, rating,
                        new DateTime(
                            int.Parse(date_time_tokens[0]),
                            int.Parse(date_time_tokens[1]),
                            int.Parse(date_time_tokens[2])));
                }
                else if (uint.TryParse(date_string, out seconds)) // unsigned integer value, interpreted as seconds since Unix epoch
                {
                    var time = new DateTime(seconds * 10000000L).AddYears(1969);
                    var offset = TimeZone.CurrentTimeZone.GetUtcOffset(time);
                    ratings.Add(user_id, item_id, rating, time - offset);
                }
                else
                    ratings.Add(user_id, item_id, rating, DateTime.Parse(date_string, CultureInfo.InvariantCulture));

                if (ratings.Count % 200000 == 199999)
                    Console.Error.Write(".");
                if (ratings.Count % 12000000 == 11999999)
                    Console.Error.WriteLine();
            }
            ratings.InitScale();
            return ratings;
        }