[Test()] public void TestNonEmptyRowIDs() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { if (i != 2 && i != 3) { matrix[i, 1] = true; matrix[i, 4] = true; } } Assert.AreEqual(3, matrix.NonEmptyRowIDs.Count); var rowIDs = matrix.NonEmptyRowIDs; var rowIDsEnum = rowIDs.GetEnumerator(); rowIDsEnum.MoveNext(); Assert.AreEqual(0, rowIDsEnum.Current); rowIDsEnum.MoveNext(); Assert.AreEqual(1, rowIDsEnum.Current); rowIDsEnum.MoveNext(); Assert.AreEqual(4, rowIDsEnum.Current); Assert.IsFalse(rowIDsEnum.MoveNext()); }
[Test()] public void TestNonEmptyColumnIDs() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { if (i != 2 && i != 3) { matrix[1, i] = true; matrix[4, i] = true; } } Assert.AreEqual(3, matrix.NonEmptyColumnIDs.Count); ICollection <int> colIDs = matrix.NonEmptyColumnIDs; var colIDsEnum = colIDs.GetEnumerator(); colIDsEnum.MoveNext(); Assert.AreEqual(0, colIDsEnum.Current); colIDsEnum.MoveNext(); Assert.AreEqual(1, colIDsEnum.Current); colIDsEnum.MoveNext(); Assert.AreEqual(4, colIDsEnum.Current); Assert.IsFalse(colIDsEnum.MoveNext()); }
/// <summary>Read binary attribute data from a StreamReader</summary> /// <remarks> /// The expected (sparse) line format is: /// ENTITY_ID tab/space/comma ATTRIBUTE_ID /// for the relations that hold. /// </remarks> /// <param name="reader">a StreamReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the attribute data</returns> static public IBooleanMatrix Read(StreamReader reader, IMapping mapping) { var matrix = new SparseBooleanMatrix(); string line; while ((line = reader.ReadLine()) != null) { // ignore empty lines if (line.Length == 0) { continue; } string[] tokens = line.Split(Constants.SPLIT_CHARS); if (tokens.Length != 2) { throw new FormatException("Expected exactly 2 columns: " + line); } int entity_id = mapping.ToInternalID(tokens[0]); int attr_id = int.Parse(tokens[1]); matrix[entity_id, attr_id] = true; } return(matrix); }
[Test()] public void TestCreate() { var sparse_boolean_matrix = new SparseBooleanMatrix(); sparse_boolean_matrix[0, 1] = true; sparse_boolean_matrix[0, 4] = true; sparse_boolean_matrix[1, 0] = true; sparse_boolean_matrix[1, 2] = true; sparse_boolean_matrix[1, 4] = true; sparse_boolean_matrix[3, 1] = true; sparse_boolean_matrix[3, 3] = true; sparse_boolean_matrix[3, 4] = true; var correlation_matrix = new BinaryCosine(sparse_boolean_matrix.NumberOfRows); correlation_matrix.ComputeCorrelations(sparse_boolean_matrix); Assert.AreEqual(4, correlation_matrix.NumberOfRows); Assert.IsTrue(correlation_matrix.IsSymmetric); Assert.AreEqual(1 / Math.Sqrt(6), correlation_matrix[0, 1], DELTA); Assert.AreEqual(1 / Math.Sqrt(6), correlation_matrix[1, 0], DELTA); Assert.AreEqual(1 / 3d, correlation_matrix[1, 3], DELTA); Assert.AreEqual(0f, correlation_matrix[2, 0]); Assert.AreEqual(0f, correlation_matrix[2, 1]); Assert.AreEqual(1f, correlation_matrix[2, 2]); Assert.AreEqual(0f, correlation_matrix[2, 3]); Assert.AreEqual(0f, correlation_matrix[0, 2]); Assert.AreEqual(0f, correlation_matrix[1, 2]); Assert.AreEqual(0f, correlation_matrix[3, 2]); }
/// <summary>Read binary attribute data from a StreamReader</summary> /// <param name="reader">a StreamReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the attribute data</returns> static public SparseBooleanMatrix Read(StreamReader reader, IEntityMapping mapping) { var matrix = new SparseBooleanMatrix(); var split_chars = new char[] { '\t', ' ' }; string line; while (!reader.EndOfStream) { line = reader.ReadLine(); // ignore empty lines if (line.Length == 0) { continue; } string[] tokens = line.Split(split_chars); if (tokens.Length != 2) { throw new IOException("Expected exactly two columns: " + line); } int entity_id = mapping.ToInternalID(int.Parse(tokens[0])); int attr_id = int.Parse(tokens[1]); matrix[entity_id, attr_id] = true; } return(matrix); }
[Test()] public void TestCreateMatrix() { var matrix = new SparseBooleanMatrix(); var other_matrix = matrix.CreateMatrix(2, 2); Assert.IsInstanceOfType(matrix.GetType(), other_matrix); }
[Test()] public void TestNumberOfRows() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { matrix[i, 1] = true; matrix[i, 4] = true; } Assert.AreEqual(5, matrix.NumberOfRows); }
// TODO generalize more to save code ... // TODO generalize that normal protocol is just an instance of this? Only if w/o performance penalty ... /// <summary>For a given user and the test dataset, return a dictionary of items filtered by attributes</summary> /// <param name="user_id">the user ID</param> /// <param name="test">the test dataset</param> /// <param name="item_attributes"></param> /// <returns>a dictionary containing a mapping from attribute IDs to collections of item IDs</returns> static public Dictionary<int, ICollection<int>> GetFilteredItems(int user_id, IPosOnlyFeedback test, SparseBooleanMatrix item_attributes) { var filtered_items = new Dictionary<int, ICollection<int>>(); foreach (int item_id in test.UserMatrix[user_id]) foreach (int attribute_id in item_attributes[item_id]) if (filtered_items.ContainsKey(attribute_id)) filtered_items[attribute_id].Add(item_id); else filtered_items[attribute_id] = new HashSet<int>() { item_id }; return filtered_items; }
[Test()] public void TestNumberOfEntries() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { if (i != 2 && i != 4) { matrix[i, 1] = true; matrix[i, 4] = false; } } Assert.AreEqual(3, matrix.NumberOfEntries); }
public override void Train() { filtered_items_by_user = new Dictionary <int, ICollection <int> > [MaxUserID + 1]; items_by_attribute = (SparseBooleanMatrix)item_attributes.Transpose(); Console.Error.WriteLine("max_user_id {0} max_item_id {1}", MaxUserID, MaxItemID); for (int u = 0; u < filtered_items_by_user.Count; u++) { filtered_items_by_user[u] = ItemsFiltered.GetFilteredItems(u, Feedback, ItemAttributes); } base.Train(); }
public MediaLiteRatingPredictor(MyMediaLite.IRecommender recommender, IEnumerable <Relation> relations) : this(recommender) { if (recommender is SocialMF) { _relations = new SparseBooleanMatrix(); foreach (var con in relations) { _relations[_usersMap.ToInternalID(con.UserId), _usersMap.ToInternalID(con.ConnectedId)] = true; } ((SocialMF)recommender).UserRelation = _relations; } }
[Test()] public void TestIsSymmetric() { var matrix = new SparseBooleanMatrix(); Assert.IsTrue(matrix.IsSymmetric); matrix[1, 1] = true; Assert.IsTrue(matrix.IsSymmetric); matrix[2, 1] = true; Assert.IsFalse(matrix.IsSymmetric); matrix[1, 2] = true; Assert.IsTrue(matrix.IsSymmetric); matrix[2, 1] = false; Assert.IsFalse(matrix.IsSymmetric); }
[Test()] public void TestNumEntriesByColumn() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { if (i != 2 && i != 3) { matrix[i, 1] = true; matrix[i, 4] = true; } } Assert.AreEqual(0, matrix.NumEntriesByColumn(0)); Assert.AreEqual(3, matrix.NumEntriesByColumn(1)); Assert.AreEqual(0, matrix.NumEntriesByColumn(2)); Assert.AreEqual(0, matrix.NumEntriesByColumn(3)); Assert.AreEqual(3, matrix.NumEntriesByColumn(4)); }
/// <summary>Read binary attribute data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">an IDataReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the attribute data</returns> static public SparseBooleanMatrix Read(IDataReader reader, IEntityMapping mapping) { if (reader.FieldCount < 2) { throw new IOException("Expected at least two columns."); } var matrix = new SparseBooleanMatrix(); while (!reader.Read()) { int entity_id = mapping.ToInternalID(reader.GetInt32(0)); int attr_id = reader.GetInt32(1); matrix[entity_id, attr_id] = true; } return(matrix); }
[Test()] public void TestNonEmptyRows() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { if (i != 2) { matrix[i, 1] = true; matrix[i, 4] = true; } } Assert.IsTrue(matrix[0, 1]); IList <KeyValuePair <int, HashSet <int> > > nonEmptyRows = matrix.NonEmptyRows; Assert.AreEqual(4, nonEmptyRows.Count); // TODO test contents }
[Test()] public void TestGetEntriesByRow() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { if (i != 2 && i != 3) { matrix[i, 1] = true; matrix[i, 4] = true; } } Assert.AreEqual(2, matrix.GetEntriesByRow(0).Count); Assert.AreEqual(2, matrix.GetEntriesByRow(1).Count); Assert.AreEqual(0, matrix.GetEntriesByRow(2).Count); Assert.AreEqual(0, matrix.GetEntriesByRow(3).Count); Assert.AreEqual(2, matrix.GetEntriesByRow(4).Count); }
[Test()] public void TestCreate() { // create test objects var sparse_boolean_matrix = new SparseBooleanMatrix(); sparse_boolean_matrix[0, 1] = true; sparse_boolean_matrix[0, 4] = true; sparse_boolean_matrix[1, 0] = true; sparse_boolean_matrix[1, 2] = true; sparse_boolean_matrix[1, 4] = true; sparse_boolean_matrix[3, 1] = true; sparse_boolean_matrix[3, 3] = true; sparse_boolean_matrix[3, 4] = true; // test var correlation_matrix = BinaryCosine.Create(sparse_boolean_matrix); Assert.AreEqual(Math.Round(1 / Math.Sqrt(6), 4), Math.Round(correlation_matrix[0, 1], 4)); Assert.AreEqual(Math.Round(1 / Math.Sqrt(6), 4), Math.Round(correlation_matrix[1, 0], 4)); Assert.AreEqual(Math.Round(1 / 3d, 4), Math.Round(correlation_matrix[1, 3], 4)); }
[Test()] public void TestComputeCorrelations() { var sparse_boolean_matrix = new SparseBooleanMatrix(); sparse_boolean_matrix[0, 1] = true; sparse_boolean_matrix[0, 4] = true; sparse_boolean_matrix[1, 0] = true; sparse_boolean_matrix[1, 2] = true; sparse_boolean_matrix[1, 4] = true; sparse_boolean_matrix[3, 1] = true; sparse_boolean_matrix[3, 3] = true; sparse_boolean_matrix[3, 4] = true; var correlation = new BinaryCosine(4); correlation.ComputeCorrelations(sparse_boolean_matrix); Assert.AreEqual(1 / Math.Sqrt(6), correlation[0, 1], DELTA); Assert.AreEqual(1 / Math.Sqrt(6), correlation[1, 0], DELTA); Assert.AreEqual(1 / 3d, correlation[1, 3], DELTA); }
[Test()] public void TestRemoveColumn() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 5; i++) { if (i != 2 && i != 4) { matrix[i, 1] = true; matrix[i, 4] = true; } } matrix[2, 2] = true; matrix.RemoveColumn(2); Assert.IsTrue(matrix[0, 3]); Assert.IsTrue(matrix[1, 3]); Assert.IsTrue(matrix[3, 3]); Assert.IsTrue(matrix[1, 1]); }
/// <summary>Read binary relation data from an IDataReader, e.g. a database via DbDataReader</summary> /// <param name="reader">an IDataReader to be read from</param> /// <param name="mapping">the mapping object for the given entity type</param> /// <returns>the relation data</returns> static public IBooleanMatrix Read(IDataReader reader, IMapping mapping) { if (reader.FieldCount < 2) { throw new FormatException("Expected at least 2 columns."); } var matrix = new SparseBooleanMatrix(); Func <string> get_e1_id = reader.GetStringGetter(0); Func <string> get_e2_id = reader.GetStringGetter(1); while (!reader.Read()) { int entity1_id = mapping.ToInternalID(get_e1_id()); int entity2_id = mapping.ToInternalID(get_e2_id()); matrix[entity1_id, entity2_id] = true; } return(matrix); }
[Test()] public void TestOverlapCount() { var matrix = new SparseBooleanMatrix(); matrix[2, 2] = true; matrix[2, 5] = true; matrix[4, 3] = true; matrix[4, 6] = true; matrix[5, 1] = true; matrix[5, 5] = true; var overlapMatrix = new SparseBooleanMatrix(); overlapMatrix[2, 1] = true; overlapMatrix[2, 5] = true; // same entry overlapMatrix[4, 4] = true; overlapMatrix[4, 6] = true; // same entry overlapMatrix[5, 2] = true; overlapMatrix[5, 5] = true; // same entry Assert.AreEqual(3, matrix.Overlap(overlapMatrix)); }
[Test()] public void TestRemoveColumns() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 7; i++) { if (i != 2 && i != 4) { matrix[i, 1] = true; matrix[i, 4] = true; } } matrix[2, 2] = true; matrix[2, 5] = true; matrix[4, 3] = true; int[] delete_columns = { 2, 4 }; matrix.RemoveColumn(delete_columns); // test the new columns Assert.IsTrue(matrix[4, 2]); Assert.IsTrue(matrix[2, 3]); Assert.IsFalse(matrix[1, 3]); Assert.IsFalse(matrix[4, 3]); }
/// public override void Train() { base.Train(); CreateSimilarityMatrix(Similarity); if (correlation is RatingCorrelationMatrix) { ((RatingCorrelationMatrix)correlation).ComputeCorrelations(ratings, Entity); } if (correlation is BinaryDataCorrelationMatrix) { this.entity_data = new SparseBooleanMatrix(); if (Entity == EntityType.USER) { for (int i = 0; i < ratings.Count; i++) { entity_data[ratings.Users[i], ratings.Items[i]] = true; } } else if (Entity == EntityType.ITEM) { for (int i = 0; i < ratings.Count; i++) { entity_data[ratings.Items[i], ratings.Users[i]] = true; } } else { throw new ArgumentException("Unknown entity type: " + Entity); } ((BinaryDataCorrelationMatrix)correlation).ComputeCorrelations(entity_data); } }
[Test()] public void TestTranspose() { var matrix = new SparseBooleanMatrix(); for (int i = 0; i < 7; i++) { if (i != 2 && i != 4) { matrix[i, 1] = true; matrix[i, 4] = true; } } matrix[2, 2] = true; matrix[2, 5] = true; matrix[4, 3] = true; // transpose the matrix var transposed_matrix = (IBooleanMatrix)matrix.Transpose(); // test the transposed matrix Assert.IsTrue(transposed_matrix[1, 0]); Assert.IsTrue(transposed_matrix[4, 6]); Assert.IsFalse(transposed_matrix[3, 1]); Assert.IsFalse(transposed_matrix[5, 4]); }
static void LoadData() { TimeSpan loading_time = Utils.MeasureTime(delegate() { // training data training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping); // relevant users and items if (relevant_users_file != null) { relevant_users = new HashSet <int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file)))); } else { relevant_users = training_data.AllUsers; } if (relevant_items_file != null) { relevant_items = new HashSet <int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file)))); } else { relevant_items = training_data.AllItems; } if (!(recommender is MyMediaLite.ItemRecommendation.Random)) { ((ItemRecommender)recommender).Feedback = training_data; } // user attributes if (recommender is IUserAttributeAwareRecommender) { if (user_attributes_file == null) { Usage("Recommender expects --user-attributes=FILE."); } else { ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping); } } // item attributes if (recommender is IItemAttributeAwareRecommender) { if (item_attributes_file == null) { Usage("Recommender expects --item-attributes=FILE."); } else { ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } } if (filtered_eval) { if (item_attributes_file == null) { Usage("--filtered-evaluation expects --item-attributes=FILE."); } else { item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping); } } // user relation if (recommender is IUserRelationAwareRecommender) { if (user_relations_file == null) { Usage("Recommender expects --user-relation=FILE."); } else { ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping); Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats } } // item relation if (recommender is IItemRelationAwareRecommender) { if (user_relations_file == null) { Usage("Recommender expects --item-relation=FILE."); } else { ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping); Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats } } // test data if (test_ratio == 0) { if (test_file != null) { test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping); } } else { var split = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio); training_data = split.Train[0]; test_data = split.Test[0]; } }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); }
protected void Run(string[] args) { Console.WriteLine("WISER-RecSys começou"); options = new OptionSet() { // string-valued options { "arquivo=", v => arquivo = v }, { "measures=", v => measures = v }, { "recommender-options=", v => recommender_options += " " + v }, { "help", v => show_help = v != null }, }; eval_measures = ItemRecommendationEvaluationResults.DefaultMeasuresToShow; IList <string> extra_args = options.Parse(args); if (show_help) { Usage(0); } //eval if (measures != null) { eval_measures = measures.Split(' ', ','); } //Rodar o de vocs // training_file = "training.data"; test_file = "test.data"; training_partial_file = "training.partial.data"; test_partial_file = "test.partial.data"; for (int i = 0; i < arquivos.Length; i++) { MyMediaLite.Random.Seed = 1; item_attributes_file = "movie_" + arquivos[i] + ".dat_saida"; user_mapping.Add(new Mapping()); item_mapping.Add(new Mapping()); //Setup recommender recommenders.Add("BPRMFAttr".CreateItemRecommender()); recommenders[i].Configure(recommender_options, (string msg) => { Console.Error.WriteLine(msg); Environment.Exit(-1); }); // item attributes if (recommenders[i] is IItemAttributeAwareRecommender && item_attributes_file == null) { Abort("Recommender expects --item-attributes=FILE."); } if (item_attributes_file != null) { item_attributes.Add(AttributeData.Read(item_attributes_file, item_mapping[i])); } if (recommenders[i] is IItemAttributeAwareRecommender) { ((IItemAttributeAwareRecommender)recommenders[i]).ItemAttributes = item_attributes[i]; } IBooleanMatrix lista_vazia = new SparseBooleanMatrix(); if (recommenders[i] is IUserAttributeAwareRecommender) { ((IUserAttributeAwareRecommender)recommenders[i]).UserAttributes = lista_vazia; } // training data training_data.Add(ItemData.Read(training_file, user_mapping[i], item_mapping[i], false)); test_data.Add(ItemData.Read(test_file, user_mapping[i], item_mapping[i], false)); test_users.Add(test_data[i].AllUsers); //Probe training_probe_data.Add(ItemData.Read(training_partial_file, user_mapping[i], item_mapping[i], false)); test_probe_data.Add(ItemData.Read(test_partial_file, user_mapping[i], item_mapping[i], false)); if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender) { ((ItemRecommender)recommenders[i]).Feedback = training_probe_data[i]; } //Trainar Console.WriteLine("Vamos ao probe training"); var train_time_span = Wrap.MeasureTime(delegate() { recommenders[i].Train(); }); Console.WriteLine("training_time " + train_time_span + " "); } Evaluation evaluation = new Evaluation(recommenders, test_probe_data, training_probe_data); //Probe learn Console.WriteLine("Probe learn started"); TimeSpan time_span = Wrap.MeasureTime(delegate() { evaluation.EvaluateProbe(test_users, user_mapping, item_mapping); }); Console.WriteLine(" Probe learn time: " + time_span); for (int i = 0; i < arquivos.Length; i++) { MyMediaLite.Random.Seed = 1; item_attributes_file = "movie_" + arquivos[i] + ".dat_saida"; //Setup recommender recommenders[i] = "BPRMFAttr".CreateItemRecommender(); recommenders[i].Configure(recommender_options, (string msg) => { Console.Error.WriteLine(msg); Environment.Exit(-1); }); // item attributes if (recommenders[i] is IItemAttributeAwareRecommender && item_attributes_file == null) { Abort("Recommender expects --item-attributes=FILE."); } if (recommenders[i] is IItemAttributeAwareRecommender) { ((IItemAttributeAwareRecommender)recommenders[i]).ItemAttributes = item_attributes[i]; } IBooleanMatrix lista_vazia = new SparseBooleanMatrix(); if (recommenders[i] is IUserAttributeAwareRecommender) { ((IUserAttributeAwareRecommender)recommenders[i]).UserAttributes = lista_vazia; } if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender) { ((ItemRecommender)recommenders[i]).Feedback = training_data[i]; } //Trainar Console.WriteLine("Agora ao treino normal"); var train_time_span = Wrap.MeasureTime(delegate() { recommenders[i].Train(); }); Console.WriteLine("training_time " + train_time_span + " "); } var results = evaluation.Evaluate(test_data, training_data, test_users, user_mapping, item_mapping); foreach (EvaluationResults result in results) { Console.WriteLine(result.ToString()); } Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
private void IterateBatch(IList <int> rating_indices, bool update_user, bool update_item) { SetupLoss(); SparseBooleanMatrix user_reverse_connections = (SparseBooleanMatrix)user_connections.Transpose(); // I. compute gradients var user_factors_gradient = new Matrix <float>(user_factors.dim1, user_factors.dim2); var item_factors_gradient = new Matrix <float>(item_factors.dim1, item_factors.dim2); var user_bias_gradient = new float[user_factors.dim1]; var item_bias_gradient = new float[item_factors.dim1]; // I.1 prediction error foreach (int index in rating_indices) { int user_id = ratings.Users[index]; int item_id = ratings.Items[index]; // prediction float score = global_bias + user_bias[user_id] + item_bias[item_id]; score += DataType.MatrixExtensions.RowScalarProduct(user_factors, user_id, item_factors, item_id); double sig_score = 1 / (1 + Math.Exp(-score)); float prediction = (float)(MinRating + sig_score * rating_range_size); float error = prediction - ratings[index]; float gradient_common = compute_gradient_common(sig_score, error); user_bias_gradient[user_id] += gradient_common; item_bias_gradient[item_id] += gradient_common; for (int f = 0; f < NumFactors; f++) { float u_f = user_factors[user_id, f]; float i_f = item_factors[item_id, f]; user_factors_gradient.Inc(user_id, f, gradient_common * i_f); item_factors_gradient.Inc(item_id, f, gradient_common * u_f); } } // I.2 L2 regularization // biases for (int u = 0; u < user_bias_gradient.Length; u++) { user_bias_gradient[u] += user_bias[u] * RegU * BiasReg; } for (int i = 0; i < item_bias_gradient.Length; i++) { item_bias_gradient[i] += item_bias[i] * RegI * BiasReg; } // latent factors for (int u = 0; u < user_factors_gradient.dim1; u++) { for (int f = 0; f < user_factors_gradient.dim2; f++) { user_factors_gradient.Inc(u, f, user_factors[u, f] * RegU); } } for (int i = 0; i < item_factors_gradient.dim1; i++) { for (int f = 0; f < item_factors_gradient.dim2; f++) { item_factors_gradient.Inc(i, f, item_factors[i, f] * RegI); } } // I.3 social network regularization -- see eq. (13) in the paper if (SocialRegularization != 0) { for (int u = 0; u < user_factors_gradient.dim1; u++) { var sum_connections = new float[NumFactors]; float bias_sum_connections = 0; int num_connections = user_connections[u].Count; foreach (int v in user_connections[u]) { bias_sum_connections += user_bias[v]; for (int f = 0; f < sum_connections.Length; f++) { sum_connections[f] += user_factors[v, f]; } } if (num_connections != 0) { user_bias_gradient[u] += social_regularization * (user_bias[u] - bias_sum_connections / num_connections); for (int f = 0; f < user_factors_gradient.dim2; f++) { user_factors_gradient.Inc(u, f, social_regularization * (user_factors[u, f] - sum_connections[f] / num_connections)); } } foreach (int v in user_reverse_connections[u]) { float trust_v = (float)1 / user_connections[v].Count; float neg_trust_times_reg = -social_regularization * trust_v; float bias_diff = 0; var factor_diffs = new float[NumFactors]; foreach (int w in user_connections[v]) { bias_diff -= user_bias[w]; for (int f = 0; f < factor_diffs.Length; f++) { factor_diffs[f] -= user_factors[w, f]; } } bias_diff *= trust_v; // normalize bias_diff += user_bias[v]; user_bias_gradient[u] += neg_trust_times_reg * bias_diff; for (int f = 0; f < factor_diffs.Length; f++) { factor_diffs[f] *= trust_v; // normalize factor_diffs[f] += user_factors[v, f]; user_factors_gradient.Inc(u, f, neg_trust_times_reg * factor_diffs[f]); } } } } // II. apply gradient descent step if (update_user) { for (int user_id = 0; user_id < user_factors_gradient.dim1; user_id++) { user_bias[user_id] -= user_bias_gradient[user_id] * LearnRate * BiasLearnRate; } user_factors_gradient.Multiply(-LearnRate); user_factors.Inc(user_factors_gradient); } if (update_item) { for (int item_id = 0; item_id < item_factors_gradient.dim1; item_id++) { item_bias[item_id] -= item_bias_gradient[item_id] * LearnRate * BiasLearnRate; } item_factors_gradient.Multiply(-LearnRate); item_factors.Inc(item_factors_gradient); } }
/// <summary>Evaluation for rankings of filtered items</summary> /// <remarks> /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data</param> /// <param name="item_attributes">the item attributes to be used for filtering</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results</returns> static public Dictionary<string, double> Evaluate( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, SparseBooleanMatrix item_attributes, ICollection<int> relevant_users, ICollection<int> relevant_items) { if (train.Overlap(test) > 0) Console.Error.WriteLine("WARNING: Overlapping train and test data"); SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose(); // compute evaluation measures double auc_sum = 0; double map_sum = 0; double prec_5_sum = 0; double prec_10_sum = 0; double prec_15_sum = 0; double ndcg_sum = 0; // for counting the users and the evaluation lists int num_lists = 0; int num_users = 0; int last_user_id = -1; foreach (int user_id in relevant_users) { var filtered_items = GetFilteredItems(user_id, test, item_attributes); foreach (int attribute_id in filtered_items.Keys) { // TODO optimize this a bit, currently it is quite naive var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]); relevant_filtered_items.IntersectWith(relevant_items); var correct_items = new HashSet<int>(filtered_items[attribute_id]); correct_items.IntersectWith(relevant_filtered_items); // the number of items that are really relevant for this user var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]); relevant_items_in_train.IntersectWith(relevant_filtered_items); int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count(); // skip all users that have 0 or #relevant_filtered_items test items if (correct_items.Count == 0) continue; if (num_eval_items - correct_items.Count == 0) continue; // counting stats num_lists++; if (last_user_id != user_id) { last_user_id = user_id; num_users++; } // evaluation int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items); auc_sum += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]); map_sum += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]); ndcg_sum += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]); prec_5_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5); prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10); prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15); if (prediction.Length != relevant_filtered_items.Count) throw new Exception("Not all items have been ranked."); if (num_lists % 1000 == 0) Console.Error.Write("."); if (num_lists % 20000 == 0) Console.Error.WriteLine(); } } var result = new Dictionary<string, double>(); result.Add("AUC", auc_sum / num_lists); result.Add("MAP", map_sum / num_lists); result.Add("NDCG", ndcg_sum / num_lists); result.Add("prec@5", prec_5_sum / num_lists); result.Add("prec@10", prec_10_sum / num_lists); result.Add("prec@15", prec_15_sum / num_lists); result.Add("num_users", num_users); result.Add("num_lists", num_lists); result.Add("num_items", relevant_items.Count); return result; }
/// public override void LearnAttributeToFactorMapping() { random = Util.Random.GetInstance(); // create helper data structure this.data_item = new SparseBooleanMatrix(); for (int i = 0; i < ratings.Count; i++) { data_item[ratings.Items[i], ratings.Users[i]] = true; } // create attribute-to-factor weight matrix this.attribute_to_factor = new Matrix <double>(NumItemAttributes + 1, NumFactors + 1); // account for regression bias term, and the item bias that we want to model // store the results of the different runs in the following array var old_attribute_to_factor = new Matrix <double> [num_init_mapping]; Console.Error.WriteLine("Will use {0} examples ...", num_iter_mapping * MaxItemID); var old_rmse_per_factor = new double[num_init_mapping][]; for (int h = 0; h < num_init_mapping; h++) { MatrixUtils.InitNormal(attribute_to_factor, InitMean, InitStdev); Console.Error.WriteLine("----"); for (int i = 0; i < num_iter_mapping * MaxItemID; i++) { IterateMapping(); } old_attribute_to_factor[h] = new Matrix <double>(attribute_to_factor); old_rmse_per_factor[h] = ComputeMappingFit(); } var min_rmse_per_factor = new double[NumFactors + 1]; for (int i = 0; i <= NumFactors; i++) { min_rmse_per_factor[i] = Double.MaxValue; } var best_factor_init = new int[NumFactors + 1]; // find best factor mappings: for (int i = 0; i < num_init_mapping; i++) { for (int j = 0; j <= NumFactors; j++) { if (old_rmse_per_factor[i][j] < min_rmse_per_factor[j]) { min_rmse_per_factor[j] = old_rmse_per_factor[i][j]; best_factor_init[j] = i; } } } // set the best weight combinations for each factor mapping for (int i = 0; i <= NumFactors; i++) { Console.Error.WriteLine("Factor {0}, pick {1}", i, best_factor_init[i]); attribute_to_factor.SetColumn(i, old_attribute_to_factor[best_factor_init[i]].GetColumn(i) ); } }