/// <summary>Read binary relation data from file</summary> /// <remarks> /// The expected (sparse) line format is: /// ENTITY_ID whitespace ENTITY_ID /// for the relations that hold. /// </remarks> /// <param name="reader">a StreamReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the relation data</returns> public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping) { var matrix = new SparseBooleanMatrix(); char[] split_chars = new char[]{ '\t', ' ' }; string line; while (!reader.EndOfStream) { line = reader.ReadLine(); // ignore empty lines if (line.Length == 0) continue; string[] tokens = line.Split(split_chars); if (tokens.Length != 2) throw new IOException("Expected exactly two columns: " + line); int entity1_id = mapping.ToInternalID(int.Parse(tokens[0])); int entity2_id = mapping.ToInternalID(int.Parse(tokens[1])); matrix[entity1_id, entity2_id] = true; } return matrix; }
/// <summary>Read binary relation data from file</summary> /// <remarks> /// The expected (sparse) line format is: /// ENTITY_ID whitespace ENTITY_ID /// for the relations that hold. /// </remarks> /// <param name="reader">a StreamReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the relation data</returns> static public SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping) { var matrix = new SparseBooleanMatrix(); char[] split_chars = new char[] { '\t', ' ' }; string line; while (!reader.EndOfStream) { line = reader.ReadLine(); // ignore empty lines if (line.Length == 0) { continue; } string[] tokens = line.Split(split_chars); if (tokens.Length != 2) { throw new IOException("Expected exactly two columns: " + line); } int entity1_id = mapping.ToInternalID(int.Parse(tokens[0])); int entity2_id = mapping.ToInternalID(int.Parse(tokens[1])); matrix[entity1_id, entity2_id] = true; } return(matrix); }
/// <summary>Read in implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); var split_chars = new char[]{ '\t', ' ', ',' }; string line; while ( (line = reader.ReadLine()) != null ) { if (line.Trim().Length == 0) continue; string[] tokens = line.Split(split_chars); if (tokens.Length < 2) throw new IOException("Expected at least two columns: " + line); int user_id = user_mapping.ToInternalID(int.Parse(tokens[0])); int item_id = item_mapping.ToInternalID(int.Parse(tokens[1])); feedback.Add(user_id, item_id); } return feedback; }
/// <summary>Read in implicit feedback data from a TextReader</summary> /// <param name="reader">the TextReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); var split_chars = new char[] { '\t', ' ', ',' }; string line; while ((line = reader.ReadLine()) != null) { if (line.Trim().Length == 0) { continue; } string[] tokens = line.Split(split_chars); if (tokens.Length < 2) { throw new IOException("Expected at least two columns: " + line); } int user_id = user_mapping.ToInternalID(int.Parse(tokens[0])); int item_id = item_mapping.ToInternalID(int.Parse(tokens[1])); feedback.Add(user_id, item_id); } return(feedback); }
Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var ratings = new Ratings(); var split_chars = new char[] { '\t', ' ', ',' }; string line; while ((line = reader.ReadLine()) != null) { if (line.Length == 0) { continue; } string[] tokens = line.Split(split_chars); if (tokens.Length < 3) { throw new IOException("Expected at least three columns: " + line); } int user_id = user_mapping.ToInternalID(int.Parse(tokens[0])); int item_id = item_mapping.ToInternalID(int.Parse(tokens[1])); double rating = double.Parse(tokens[2], CultureInfo.InvariantCulture); ratings.Add(user_id, item_id, rating); } return(ratings); }
/// <summary>Read in rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <returns>the rating data</returns> public static IRatings Read(TextReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var ratings = new Ratings(); var split_chars = new char[]{ '\t', ' ', ',' }; string line; while ( (line = reader.ReadLine()) != null ) { if (line.Length == 0) continue; string[] tokens = line.Split(split_chars); if (tokens.Length < 3) throw new IOException("Expected at least three columns: " + line); int user_id = user_mapping.ToInternalID(int.Parse(tokens[0])); int item_id = item_mapping.ToInternalID(int.Parse(tokens[1])); double rating = double.Parse(tokens[2], CultureInfo.InvariantCulture); ratings.Add(user_id, item_id, rating); } return ratings; }
/// <summary>Read binary relation data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">an IDataReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the relation data</returns> static public SparseBooleanMatrix Read(IDataReader reader, IEntityMapping mapping) { if (reader.FieldCount < 2) { throw new IOException("Expected at least two columns."); } var matrix = new SparseBooleanMatrix(); while (!reader.Read()) { int entity1_id = mapping.ToInternalID(reader.GetInt32(0)); int entity2_id = mapping.ToInternalID(reader.GetInt32(0)); matrix[entity1_id, entity2_id] = true; } return(matrix); }
/// <summary>Read in static rating data from a TextReader</summary> /// <param name="reader">the <see cref="TextReader"/> to read from</param> /// <param name="size">the number of ratings in the file</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <param name="rating_type">the data type to be used for storing the ratings</param> /// <returns>the rating data</returns> static public IRatings Read(TextReader reader, int size, IEntityMapping user_mapping, IEntityMapping item_mapping, RatingType rating_type) { IRatings ratings; if (rating_type == RatingType.BYTE) { ratings = new StaticByteRatings(size); } else if (rating_type == RatingType.FLOAT) { ratings = new StaticFloatRatings(size); } else { ratings = new StaticRatings(size); } var split_chars = new char[] { '\t', ' ', ',' }; string line; while ((line = reader.ReadLine()) != null) { if (line.Length == 0) { continue; } string[] tokens = line.Split(split_chars); if (tokens.Length < 3) { throw new IOException("Expected at least three columns: " + line); } int user_id = user_mapping.ToInternalID(int.Parse(tokens[0])); int item_id = item_mapping.ToInternalID(int.Parse(tokens[1])); double rating = double.Parse(tokens[2], CultureInfo.InvariantCulture); ratings.Add(user_id, item_id, rating); } return(ratings); }
/// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> static public IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback <SparseBooleanMatrix>(); if (reader.FieldCount < 2) { throw new IOException("Expected at least two columns."); } while (reader.Read()) { int user_id = user_mapping.ToInternalID(reader.GetInt32(0)); int item_id = item_mapping.ToInternalID(reader.GetInt32(1)); feedback.Add(user_id, item_id); } return(feedback); }
/// <summary>Read binary attribute data from a StreamReader</summary> /// <remarks> /// The expected (sparse) line format is: /// ENTITY_ID tab/space/comma ATTRIBUTE_ID /// for the relations that hold. /// </remarks> /// <param name="reader">a StreamReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the attribute data</returns> public static SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping) { var matrix = new SparseBooleanMatrix(); string line; while ((line = reader.ReadLine()) != null) { // ignore empty lines if (line.Length == 0) continue; string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length != 2) throw new FormatException("Expected exactly 2 columns: " + line); int entity_id = mapping.ToInternalID(tokens[0]); int attr_id = int.Parse(tokens[1]); matrix[entity_id, attr_id] = true; } return matrix; }
/// <summary>Read binary attribute data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">an IDataReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the attribute data</returns> public static SparseBooleanMatrix Read(IDataReader reader, IEntityMapping mapping) { if (reader.FieldCount < 2) throw new Exception("Expected at least 2 columns."); var matrix = new SparseBooleanMatrix(); while (!reader.Read()) { int entity_id = mapping.ToInternalID(reader.GetString(0)); int attr_id = reader.GetInt32(1); matrix[entity_id, attr_id] = true; } return matrix; }
static void LoadData() { TimeSpan loading_time = Utils.MeasureTime(delegate() { // training data training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping); // relevant users and items if (relevant_users_file != null) { relevant_users = new HashSet <int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file)))); } else { relevant_users = training_data.AllUsers; } if (relevant_items_file != null) { relevant_items = new HashSet <int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file)))); } else { relevant_items = training_data.AllItems; } if (!(recommender is MyMediaLite.ItemRecommendation.Random)) { ((ItemRecommender)recommender).Feedback = training_data; } // user attributes if (recommender is IUserAttributeAwareRecommender) { if (user_attributes_file == null) { Usage("Recommender expects --user-attributes=FILE."); } else { ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); } } // item attributes if (recommender is IItemAttributeAwareRecommender) { if (item_attributes_file == null) { Usage("Recommender expects --item-attributes=FILE."); } else { ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } } if (filtered_eval) { if (item_attributes_file == null) { Usage("--filtered-evaluation expects --item-attributes=FILE."); } else { item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } } // user relation if (recommender is IUserRelationAwareRecommender) { if (user_relations_file == null) { Usage("Recommender expects --user-relation=FILE."); } else { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats } } // item relation if (recommender is IItemRelationAwareRecommender) { if (user_relations_file == null) { Usage("Recommender expects --item-relation=FILE."); } else { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats } } // test data if (test_ratio == 0) { if (test_file != null) { test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping); } } else { var split = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); }
/// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); if (reader.FieldCount < 2) throw new IOException("Expected at least two columns."); while (reader.Read()) { int user_id = user_mapping.ToInternalID(reader.GetInt32(0)); int item_id = item_mapping.ToInternalID(reader.GetInt32(1)); feedback.Add(user_id, item_id); } return feedback; }
/// <summary>Read in rating data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the <see cref="IDataReader"/> to read from</param> /// <param name="user_mapping">mapping object for user IDs</param> /// <param name="item_mapping">mapping object for item IDs</param> /// <returns>the rating data</returns> public static IRatings Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var ratings = new Ratings(); if (reader.FieldCount < 3) throw new FormatException("Expected at least 3 columns."); Func<string> get_user_id = reader.GetStringGetter(0); Func<string> get_item_id = reader.GetStringGetter(1); Func<float> get_rating = reader.GetFloatGetter(2); while (reader.Read()) { int user_id = user_mapping.ToInternalID(get_user_id()); int item_id = item_mapping.ToInternalID(get_item_id()); float rating = get_rating(); ratings.Add(user_id, item_id, rating); } return ratings; }
/// <summary>Read in implicit feedback data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">the IDataReader to be read from</param> /// <param name="user_mapping">user <see cref="IEntityMapping"/> object</param> /// <param name="item_mapping">item <see cref="IEntityMapping"/> object</param> /// <returns>a <see cref="IPosOnlyFeedback"/> object with the user-wise collaborative data</returns> public static IPosOnlyFeedback Read(IDataReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping) { var feedback = new PosOnlyFeedback<SparseBooleanMatrix>(); if (reader.FieldCount < 2) throw new FormatException("Expected at least 2 columns."); Func<string> get_user_id = reader.GetStringGetter(0); Func<string> get_item_id = reader.GetStringGetter(1); while (reader.Read()) { int user_id = user_mapping.ToInternalID(get_user_id()); int item_id = item_mapping.ToInternalID(get_item_id()); feedback.Add(user_id, item_id); } return feedback; }