示例#1
0
    static void LoadData()
    {
        TimeSpan loading_time = Utils.MeasureTime(delegate() {
            // training data
            training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping);

            // relevant users and items
            if (relevant_users_file != null)
            {
                relevant_users = new HashSet <int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file))));
            }
            else
            {
                relevant_users = training_data.AllUsers;
            }
            if (relevant_items_file != null)
            {
                relevant_items = new HashSet <int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
            }
            else
            {
                relevant_items = training_data.AllItems;
            }

            if (!(recommender is MyMediaLite.ItemRecommendation.Random))
            {
                ((ItemRecommender)recommender).Feedback = training_data;
            }

            // user attributes
            if (recommender is IUserAttributeAwareRecommender)
            {
                if (user_attributes_file == null)
                {
                    Usage("Recommender expects --user-attributes=FILE.");
                }
                else
                {
                    ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
                }
            }

            // item attributes
            if (recommender is IItemAttributeAwareRecommender)
            {
                if (item_attributes_file == null)
                {
                    Usage("Recommender expects --item-attributes=FILE.");
                }
                else
                {
                    ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
                }
            }
            if (filtered_eval)
            {
                if (item_attributes_file == null)
                {
                    Usage("--filtered-evaluation expects --item-attributes=FILE.");
                }
                else
                {
                    item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
                }
            }

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --user-relation=FILE.");
                }
                else
                {
                    ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                    Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);                     // TODO move to DisplayDataStats
                }
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --item-relation=FILE.");
                }
                else
                {
                    ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                    Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);                     // TODO move to DisplayDataStats
                }
            }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping);
                }
            }
            else
            {
                var split     = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }
        });

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
    }
示例#2
0
    static void LoadData()
    {
        TimeSpan loading_time = Utils.MeasureTime(delegate() {
            // training data
            training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping);

            // relevant users and items
            if (relevant_users_file != null)
                relevant_users = new HashSet<int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file))));
            else
                relevant_users = training_data.AllUsers;
            if (relevant_items_file != null)
                relevant_items = new HashSet<int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
            else
                relevant_items = training_data.AllItems;

            if (! (recommender is MyMediaLite.ItemRecommendation.Random))
                ((ItemRecommender)recommender).Feedback = training_data;

            // user attributes
            if (recommender is IUserAttributeAwareRecommender)
            {
                if (user_attributes_file == null)
                    Usage("Recommender expects --user-attributes=FILE.");
                else
                    ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            }

            // item attributes
            if (recommender is IItemAttributeAwareRecommender)
            {
                if (item_attributes_file == null)
                    Usage("Recommender expects --item-attributes=FILE.");
                else
                    ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            }
            if (filtered_eval)
            {
                if (item_attributes_file == null)
                    Usage("--filtered-evaluation expects --item-attributes=FILE.");
                else
                    item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            }

            // user relation
            if (recommender is IUserRelationAwareRecommender)
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --user-relation=FILE.");
                }
                else
                {
                    ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                    Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats
                }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --item-relation=FILE.");
                }
                else
                {
                    ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                    Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats
                }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                    test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping);
            }
            else
            {
                var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
    }
    protected override void LoadData()
    {
        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            base.LoadData();

            // training data
            training_data = double.IsNaN(rating_threshold)
                                ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                                : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_data = double.IsNaN(rating_threshold)
                                                ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                                                : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);
                }
            }
            else
            {
                var split     = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }

            if (user_prediction)
            {
                // swap file names for test users and candidate items
                var ruf              = test_users_file;
                var rif              = candidate_items_file;
                test_users_file      = rif;
                candidate_items_file = ruf;

                // swap user and item mappings
                var um       = user_mapping;
                var im       = item_mapping;
                user_mapping = im;
                item_mapping = um;

                // transpose training and test data
                training_data = training_data.Transpose();

                // transpose test data
                if (test_data != null)
                {
                    test_data = test_data.Transpose();
                }
            }

            for (int i = 0; i < recommenders.Count; i++)
            {
                if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender)
                {
                    ((ItemRecommender)recommenders[i]).Feedback = training_data;
                }
            }
            // test users
            if (test_users_file != null)
            {
                test_users = user_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray());
            }
            else
            {
                test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers;
            }

            // if necessary, perform user sampling
            if (num_test_users > 0 && num_test_users < test_users.Count)
            {
                var old_test_users = new HashSet <int>(test_users);
                var new_test_users = new int[num_test_users];
                for (int i = 0; i < num_test_users; i++)
                {
                    int random_index  = MyMediaLite.Random.GetInstance().Next(old_test_users.Count - 1);
                    new_test_users[i] = old_test_users.ElementAt(random_index);
                    old_test_users.Remove(new_test_users[i]);
                }
                test_users = new_test_users;
            }

            // candidate items
            if (candidate_items_file != null)
            {
                candidate_items = item_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray());
            }
            else if (all_items)
            {
                candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray();
            }

            if (candidate_items != null)
            {
                eval_item_mode = CandidateItems.EXPLICIT;
            }
            else if (in_training_items)
            {
                eval_item_mode = CandidateItems.TRAINING;
            }
            else if (in_test_items)
            {
                eval_item_mode = CandidateItems.TEST;
            }
            else if (overlap_items)
            {
                eval_item_mode = CandidateItems.OVERLAP;
            }
            else
            {
                eval_item_mode = CandidateItems.UNION;
            }
        });

        //Salvar arquivos


        List <string> linesToWrite = new List <string>();

        for (int i = 0; i < training_data.UserMatrix.NumberOfRows; i++)
        {
            IList <int> columns = training_data.UserMatrix.GetEntriesByRow(i);
            for (int j = 0; j < columns.Count; j++)
            {
                StringBuilder line = new StringBuilder();
                line.Append(i.ToString() + " " + columns[j].ToString());
                linesToWrite.Add(line.ToString());
            }
        }
        System.IO.File.WriteAllLines("training.data", linesToWrite.ToArray());


        linesToWrite = new List <string>();
        for (int i = 0; i < test_data.UserMatrix.NumberOfRows; i++)
        {
            IList <int> columns = test_data.UserMatrix.GetEntriesByRow(i);
            for (int j = 0; j < columns.Count; j++)
            {
                StringBuilder line = new StringBuilder();
                line.Append(i.ToString() + " " + columns[j].ToString());
                linesToWrite.Add(line.ToString());
            }
        }
        System.IO.File.WriteAllLines("test.data", linesToWrite.ToArray());


        /*
         * List<string> linesToWrite = new List<string>();
         * for (int rowIndex = 0; rowIndex < training_data.AllItems.Count; rowIndex++)
         * {
         *
         * }*/

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
示例#4
0
    static void LoadData()
    {
        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            // training data
            training_file = Path.Combine(data_dir, training_file);
            training_data = double.IsNaN(rating_threshold)
                ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);

            // user attributes
            if (user_attributes_file != null)
                user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            if (recommender is IUserAttributeAwareRecommender)
                ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes;

            // item attributes
            if (item_attributes_file != null)
                item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            if (recommender is IItemAttributeAwareRecommender)
                ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes;

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);
            }

            // user groups
            if (user_groups_file != null)
            {
                group_to_user = RelationData.Read(Path.Combine(data_dir, user_groups_file), user_mapping); // assumption: user and user group IDs are disjoint
                user_groups = group_to_user.NonEmptyRowIDs;
                Console.WriteLine("{0} user groups", user_groups.Count);
            }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_file = Path.Combine(data_dir, test_file);
                    test_data = double.IsNaN(rating_threshold)
                        ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                        : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);
                }
            }
            else
            {
                var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }

            if (group_method == "GroupsAsUsers")
            {
                Console.WriteLine("group recommendation strategy: {0}", group_method);
                // TODO verify what is going on here

                //var training_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                // transform groups to users
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in training_data.UserMatrix.GetEntriesByRow(user_id))
                            training_data.Add(group_id, item_id);
                // add the users that do not belong to groups

                //training_data = training_data_group;

                // transform groups to users
                var test_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in test_data.UserMatrix.GetEntriesByRow(user_id))
                            test_data_group.Add(group_id, item_id);

                test_data = test_data_group;

                group_method = null; // deactivate s.t. the normal eval routines are used
            }

            if (user_prediction)
            {
                // swap file names for test users and candidate items
                var ruf = test_users_file;
                var rif = candidate_items_file;
                test_users_file = rif;
                candidate_items_file = ruf;

                // swap user and item mappings
                var um = user_mapping;
                var im = item_mapping;
                user_mapping = im;
                item_mapping = um;

                // transpose training and test data
                training_data = training_data.Transpose();

                // transpose test data
                if (test_data != null)
                    test_data = test_data.Transpose();
            }

            if (recommender is MyMediaLite.ItemRecommendation.ItemRecommender)
                ((ItemRecommender)recommender).Feedback = training_data;

            // test users
            if (test_users_file != null)
                test_users = user_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray() );
            else
                test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers;

            // if necessary, perform user sampling
            if (num_test_users > 0 && num_test_users < test_users.Count)
            {
                var old_test_users = new HashSet<int>(test_users);
                var new_test_users = new int[num_test_users];
                for (int i = 0; i < num_test_users; i++)
                {
                    int random_index = MyMediaLite.Util.Random.GetInstance().Next(old_test_users.Count - 1);
                    new_test_users[i] = old_test_users.ElementAt(random_index);
                    old_test_users.Remove(new_test_users[i]);
                }
                test_users = new_test_users;
            }

            // candidate items
            if (candidate_items_file != null)
                candidate_items = item_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray() );
            else if (all_items)
                candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray();

            if (candidate_items != null)
                eval_item_mode = CandidateItems.EXPLICIT;
            else if (in_training_items)
                eval_item_mode = CandidateItems.TRAINING;
            else if (in_test_items)
                eval_item_mode = CandidateItems.TEST;
            else if (overlap_items)
                eval_item_mode = CandidateItems.OVERLAP;
            else
                eval_item_mode = CandidateItems.UNION;
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }