示例#1
0
        /// <summary>Read binary relation data from file</summary>
        /// <remarks>
        /// The expected (sparse) line format is:
        /// ENTITY_ID whitespace ENTITY_ID
        /// for the relations that hold.
        /// </remarks>
        /// <param name="reader">a StreamReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the relation data</returns>
        public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping)
        {
            var matrix = new SparseBooleanMatrix();

            char[] split_chars = new char[]{ '\t', ' ' };
            string line;

            while (!reader.EndOfStream)
            {
               	line = reader.ReadLine();

                // ignore empty lines
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(split_chars);

                if (tokens.Length != 2)
                    throw new IOException("Expected exactly two columns: " + line);

                int entity1_id = mapping.ToInternalID(int.Parse(tokens[0]));
                int entity2_id = mapping.ToInternalID(int.Parse(tokens[1]));

               	matrix[entity1_id, entity2_id] = true;
            }

            return matrix;
        }
示例#2
0
        /// <summary>Read binary relation data from file</summary>
        /// <remarks>
        /// The expected (sparse) line format is:
        /// ENTITY_ID whitespace ENTITY_ID
        /// for the relations that hold.
        /// </remarks>
        /// <param name="reader">a StreamReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the relation data</returns>
        static public SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping)
        {
            var matrix = new SparseBooleanMatrix();

            char[] split_chars = new char[] { '\t', ' ' };
            string line;

            while (!reader.EndOfStream)
            {
                line = reader.ReadLine();

                // ignore empty lines
                if (line.Length == 0)
                {
                    continue;
                }

                string[] tokens = line.Split(split_chars);

                if (tokens.Length != 2)
                {
                    throw new IOException("Expected exactly two columns: " + line);
                }

                int entity1_id = mapping.ToInternalID(int.Parse(tokens[0]));
                int entity2_id = mapping.ToInternalID(int.Parse(tokens[1]));

                matrix[entity1_id, entity2_id] = true;
            }

            return(matrix);
        }
示例#3
0
        /// <summary>Read in implicit feedback data from a TextReader</summary>
        /// <param name="reader">the TextReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            var split_chars = new char[]{ '\t', ' ', ',' };
            string line;

            while ( (line = reader.ReadLine()) != null )
            {
                if (line.Trim().Length == 0)
                    continue;

                string[] tokens = line.Split(split_chars);

                if (tokens.Length < 2)
                    throw new IOException("Expected at least two columns: " + line);

                int user_id = user_mapping.ToInternalID(int.Parse(tokens[0]));
                int item_id = item_mapping.ToInternalID(int.Parse(tokens[1]));

               	feedback.Add(user_id, item_id);
            }

            return feedback;
        }
示例#4
0
        /// <summary>Read in implicit feedback data from a TextReader</summary>
        /// <param name="reader">the TextReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        static public IPosOnlyFeedback Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback <SparseBooleanMatrix>();

            var    split_chars = new char[] { '\t', ' ', ',' };
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                if (line.Trim().Length == 0)
                {
                    continue;
                }

                string[] tokens = line.Split(split_chars);

                if (tokens.Length < 2)
                {
                    throw new IOException("Expected at least two columns: " + line);
                }

                int user_id = user_mapping.ToInternalID(int.Parse(tokens[0]));
                int item_id = item_mapping.ToInternalID(int.Parse(tokens[1]));

                feedback.Add(user_id, item_id);
            }

            return(feedback);
        }
示例#5
0
        Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var ratings = new Ratings();

            var    split_chars = new char[] { '\t', ' ', ',' };
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                if (line.Length == 0)
                {
                    continue;
                }

                string[] tokens = line.Split(split_chars);

                if (tokens.Length < 3)
                {
                    throw new IOException("Expected at least three columns: " + line);
                }

                int    user_id = user_mapping.ToInternalID(int.Parse(tokens[0]));
                int    item_id = item_mapping.ToInternalID(int.Parse(tokens[1]));
                double rating  = double.Parse(tokens[2], CultureInfo.InvariantCulture);

                ratings.Add(user_id, item_id, rating);
            }
            return(ratings);
        }
示例#6
0
        /// <summary>Read in rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <returns>the rating data</returns>
        public static IRatings Read(TextReader reader,	IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var ratings = new Ratings();

            var split_chars = new char[]{ '\t', ' ', ',' };
            string line;

            while ( (line = reader.ReadLine()) != null )
            {
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(split_chars);

                if (tokens.Length < 3)
                    throw new IOException("Expected at least three columns: " + line);

                int user_id = user_mapping.ToInternalID(int.Parse(tokens[0]));
                int item_id = item_mapping.ToInternalID(int.Parse(tokens[1]));
                double rating = double.Parse(tokens[2], CultureInfo.InvariantCulture);

                ratings.Add(user_id, item_id, rating);
            }
            return ratings;
        }
示例#7
0
        /// <summary>Read binary relation data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">an IDataReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the relation data</returns>
        static public SparseBooleanMatrix Read(IDataReader reader, IEntityMapping mapping)
        {
            if (reader.FieldCount < 2)
            {
                throw new IOException("Expected at least two columns.");
            }

            var matrix = new SparseBooleanMatrix();

            while (!reader.Read())
            {
                int entity1_id = mapping.ToInternalID(reader.GetInt32(0));
                int entity2_id = mapping.ToInternalID(reader.GetInt32(0));

                matrix[entity1_id, entity2_id] = true;
            }

            return(matrix);
        }
示例#8
0
        /// <summary>Read in static rating data from a TextReader</summary>
        /// <param name="reader">the <see cref="TextReader"/> to read from</param>
        /// <param name="size">the number of ratings in the file</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <param name="rating_type">the data type to be used for storing the ratings</param>
        /// <returns>the rating data</returns>
        static public IRatings Read(TextReader reader, int size,
                                    IEntityMapping user_mapping, IEntityMapping item_mapping,
                                    RatingType rating_type)
        {
            IRatings ratings;

            if (rating_type == RatingType.BYTE)
            {
                ratings = new StaticByteRatings(size);
            }
            else if (rating_type == RatingType.FLOAT)
            {
                ratings = new StaticFloatRatings(size);
            }
            else
            {
                ratings = new StaticRatings(size);
            }

            var    split_chars = new char[] { '\t', ' ', ',' };
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                if (line.Length == 0)
                {
                    continue;
                }

                string[] tokens = line.Split(split_chars);

                if (tokens.Length < 3)
                {
                    throw new IOException("Expected at least three columns: " + line);
                }

                int    user_id = user_mapping.ToInternalID(int.Parse(tokens[0]));
                int    item_id = item_mapping.ToInternalID(int.Parse(tokens[1]));
                double rating  = double.Parse(tokens[2], CultureInfo.InvariantCulture);

                ratings.Add(user_id, item_id, rating);
            }
            return(ratings);
        }
示例#9
0
        /// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the IDataReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        static public IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback <SparseBooleanMatrix>();

            if (reader.FieldCount < 2)
            {
                throw new IOException("Expected at least two columns.");
            }

            while (reader.Read())
            {
                int user_id = user_mapping.ToInternalID(reader.GetInt32(0));
                int item_id = item_mapping.ToInternalID(reader.GetInt32(1));

                feedback.Add(user_id, item_id);
            }

            return(feedback);
        }
示例#10
0
        /// <summary>Read binary attribute data from a StreamReader</summary>
        /// <remarks>
        /// The expected (sparse) line format is:
        /// ENTITY_ID tab/space/comma ATTRIBUTE_ID
        /// for the relations that hold.
        /// </remarks>
        /// <param name="reader">a StreamReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the attribute data</returns>
        public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping)
        {
            var matrix = new SparseBooleanMatrix();

            string line;
            while ((line = reader.ReadLine()) != null)
            {
                // ignore empty lines
                if (line.Length == 0)
                    continue;

                string[] tokens = line.Split(Constants.SPLIT_CHARS);

                if (tokens.Length != 2)
                    throw new FormatException("Expected exactly 2 columns: " + line);

                int entity_id = mapping.ToInternalID(tokens[0]);
                int attr_id   = int.Parse(tokens[1]);

                matrix[entity_id, attr_id] = true;
            }

            return matrix;
        }
示例#11
0
        /// <summary>Read binary attribute data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">an IDataReader to be read from</param>
        /// <param name="mapping">the mapping object for the given entity type</param>
        /// <returns>the attribute data</returns>
        public static SparseBooleanMatrix Read(IDataReader reader, IEntityMapping mapping)
        {
            if (reader.FieldCount < 2)
                throw new Exception("Expected at least 2 columns.");

            var matrix = new SparseBooleanMatrix();

            while (!reader.Read())
            {
                int entity_id = mapping.ToInternalID(reader.GetString(0));
                int attr_id   = reader.GetInt32(1);

                matrix[entity_id, attr_id] = true;
            }

            return matrix;
        }
示例#12
0
    static void LoadData()
    {
        TimeSpan loading_time = Utils.MeasureTime(delegate() {
            // training data
            training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping);

            // relevant users and items
            if (relevant_users_file != null)
            {
                relevant_users = new HashSet <int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file))));
            }
            else
            {
                relevant_users = training_data.AllUsers;
            }
            if (relevant_items_file != null)
            {
                relevant_items = new HashSet <int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
            }
            else
            {
                relevant_items = training_data.AllItems;
            }

            if (!(recommender is MyMediaLite.ItemRecommendation.Random))
            {
                ((ItemRecommender)recommender).Feedback = training_data;
            }

            // user attributes
            if (recommender is IUserAttributeAwareRecommender)
            {
                if (user_attributes_file == null)
                {
                    Usage("Recommender expects --user-attributes=FILE.");
                }
                else
                {
                    ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
                }
            }

            // item attributes
            if (recommender is IItemAttributeAwareRecommender)
            {
                if (item_attributes_file == null)
                {
                    Usage("Recommender expects --item-attributes=FILE.");
                }
                else
                {
                    ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
                }
            }
            if (filtered_eval)
            {
                if (item_attributes_file == null)
                {
                    Usage("--filtered-evaluation expects --item-attributes=FILE.");
                }
                else
                {
                    item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
                }
            }

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --user-relation=FILE.");
                }
                else
                {
                    ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                    Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);                     // TODO move to DisplayDataStats
                }
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --item-relation=FILE.");
                }
                else
                {
                    ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                    Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);                     // TODO move to DisplayDataStats
                }
            }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping);
                }
            }
            else
            {
                var split     = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }
        });

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
    }
示例#13
0
        /// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the IDataReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            if (reader.FieldCount < 2)
                throw new IOException("Expected at least two columns.");

            while (reader.Read())
            {
                int user_id = user_mapping.ToInternalID(reader.GetInt32(0));
                int item_id = item_mapping.ToInternalID(reader.GetInt32(1));

                feedback.Add(user_id, item_id);
            }

            return feedback;
        }
示例#14
0
        /// <summary>Read in rating data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the <see cref="IDataReader"/> to read from</param>
        /// <param name="user_mapping">mapping object for user IDs</param>
        /// <param name="item_mapping">mapping object for item IDs</param>
        /// <returns>the rating data</returns>
        public static IRatings Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var ratings = new Ratings();

            if (reader.FieldCount < 3)
                throw new FormatException("Expected at least 3 columns.");

            Func<string> get_user_id = reader.GetStringGetter(0);
            Func<string> get_item_id = reader.GetStringGetter(1);
            Func<float>  get_rating  = reader.GetFloatGetter(2);

            while (reader.Read())
            {
                int user_id = user_mapping.ToInternalID(get_user_id());
                int item_id = item_mapping.ToInternalID(get_item_id());
                float rating = get_rating();

                ratings.Add(user_id, item_id, rating);
            }
            return ratings;
        }
示例#15
0
        /// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary>
        /// <param name="reader">the IDataReader to be read from</param>
        /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param>
        /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param>
        /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns>
        public static IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping)
        {
            var feedback = new PosOnlyFeedback<SparseBooleanMatrix>();

            if (reader.FieldCount < 2)
                throw new FormatException("Expected at least 2 columns.");

            Func<string> get_user_id = reader.GetStringGetter(0);
            Func<string> get_item_id = reader.GetStringGetter(1);

            while (reader.Read())
            {
                int user_id = user_mapping.ToInternalID(get_user_id());
                int item_id = item_mapping.ToInternalID(get_item_id());

                feedback.Add(user_id, item_id);
            }

            return feedback;
        }