public void TestReadIgnoreLine()
        {
            string s = @"# first line
5951,50,5
5951,223,5
5951,260,5
5951,293,5
5951,356,4
5951,364,3
5951,457,3
";

            IDataSet data = ItemDataRatingThreshold.Read(new StringReader(s), 0, null, null, true);

            Assert.AreEqual(7, data.Count);

            data = ItemDataRatingThreshold.Read(new StringReader(s), 5, null, null, true);
            Assert.AreEqual(4, data.Count);

            data = ItemDataRatingThreshold.Read(new StringReader(s), 4, null, null, true);
            Assert.AreEqual(5, data.Count);

            data = ItemDataRatingThreshold.Read(new StringReader(s), 3, null, null, true);
            Assert.AreEqual(7, data.Count);
        }
        public void TestRead()
        {
            string s = @"5951,50,5
5951,223,5
5951,260,5
5951,293,5
5951,356,4
5951,364,3
5951,457,3
";

            IDataSet data = ItemDataRatingThreshold.Read(new StringReader(s), 0);

            Assert.AreEqual(7, data.Count);

            data = ItemDataRatingThreshold.Read(new StringReader(s), 5.0f);
            Assert.AreEqual(4, data.Count);

            data = ItemDataRatingThreshold.Read(new StringReader(s), 4);
            Assert.AreEqual(5, data.Count);

            data = ItemDataRatingThreshold.Read(new StringReader(s), 3);
            Assert.AreEqual(7, data.Count);
        }
コード例 #3
0
    protected override void LoadData()
    {
        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            base.LoadData();

            // training data
            training_data = double.IsNaN(rating_threshold)
                                ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                                : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_data = double.IsNaN(rating_threshold)
                                                ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                                                : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);
                }
            }
            else
            {
                var split     = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }

            if (user_prediction)
            {
                // swap file names for test users and candidate items
                var ruf              = test_users_file;
                var rif              = candidate_items_file;
                test_users_file      = rif;
                candidate_items_file = ruf;

                // swap user and item mappings
                var um       = user_mapping;
                var im       = item_mapping;
                user_mapping = im;
                item_mapping = um;

                // transpose training and test data
                training_data = training_data.Transpose();

                // transpose test data
                if (test_data != null)
                {
                    test_data = test_data.Transpose();
                }
            }

            for (int i = 0; i < recommenders.Count; i++)
            {
                if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender)
                {
                    ((ItemRecommender)recommenders[i]).Feedback = training_data;
                }
            }
            // test users
            if (test_users_file != null)
            {
                test_users = user_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray());
            }
            else
            {
                test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers;
            }

            // if necessary, perform user sampling
            if (num_test_users > 0 && num_test_users < test_users.Count)
            {
                var old_test_users = new HashSet <int>(test_users);
                var new_test_users = new int[num_test_users];
                for (int i = 0; i < num_test_users; i++)
                {
                    int random_index  = MyMediaLite.Random.GetInstance().Next(old_test_users.Count - 1);
                    new_test_users[i] = old_test_users.ElementAt(random_index);
                    old_test_users.Remove(new_test_users[i]);
                }
                test_users = new_test_users;
            }

            // candidate items
            if (candidate_items_file != null)
            {
                candidate_items = item_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray());
            }
            else if (all_items)
            {
                candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray();
            }

            if (candidate_items != null)
            {
                eval_item_mode = CandidateItems.EXPLICIT;
            }
            else if (in_training_items)
            {
                eval_item_mode = CandidateItems.TRAINING;
            }
            else if (in_test_items)
            {
                eval_item_mode = CandidateItems.TEST;
            }
            else if (overlap_items)
            {
                eval_item_mode = CandidateItems.OVERLAP;
            }
            else
            {
                eval_item_mode = CandidateItems.UNION;
            }
        });

        //Salvar arquivos


        List <string> linesToWrite = new List <string>();

        for (int i = 0; i < training_data.UserMatrix.NumberOfRows; i++)
        {
            IList <int> columns = training_data.UserMatrix.GetEntriesByRow(i);
            for (int j = 0; j < columns.Count; j++)
            {
                StringBuilder line = new StringBuilder();
                line.Append(i.ToString() + " " + columns[j].ToString());
                linesToWrite.Add(line.ToString());
            }
        }
        System.IO.File.WriteAllLines("training.data", linesToWrite.ToArray());


        linesToWrite = new List <string>();
        for (int i = 0; i < test_data.UserMatrix.NumberOfRows; i++)
        {
            IList <int> columns = test_data.UserMatrix.GetEntriesByRow(i);
            for (int j = 0; j < columns.Count; j++)
            {
                StringBuilder line = new StringBuilder();
                line.Append(i.ToString() + " " + columns[j].ToString());
                linesToWrite.Add(line.ToString());
            }
        }
        System.IO.File.WriteAllLines("test.data", linesToWrite.ToArray());


        /*
         * List<string> linesToWrite = new List<string>();
         * for (int rowIndex = 0; rowIndex < training_data.AllItems.Count; rowIndex++)
         * {
         *
         * }*/

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }