public CrossDomainSimpleSplitter(CrossDomainDataContainer container, float testPortion)
        {
            var targetRatings = container.Ratings.Where(r => r.Domain.IsTarget == true).Shuffle();
            int trainCount    = (int)Math.Round(targetRatings.Count() * (1 - testPortion));

            Train = targetRatings.Take(trainCount); //.Concat(container.Ratings.Where(r => r.Domain.IsTarget == false));
            Test  = targetRatings.Skip(trainCount);
        }
Example #2
0
        public void CreateDatasetsFromOriginalDataset()
        {
            // step 1: dataset
            var config = new CsvConfiguration()
            {
                Delimiter = ",",
                HasHeaderRecord = true
            };

            var container = new CrossDomainDataContainer();

            var bookDomain = new Domain("book");
            var musicDomain = new Domain("music");
            var dvdDomain = new Domain("dvd");
            var videoDomain = new Domain("video");

            var bookReader = new CsvReader(Paths.AmazonAllBookRatings, config, bookDomain);
            var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain);
            var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain);
            var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain);

            bookReader.LoadData(container);
            musicReader.LoadData(container);
            dvdReader.LoadData(container);
            videoReader.LoadData(container);

            var output = container.Users.Values.Where(u =>
            {
                var counts = u.Ratings.GroupBy(r => r.Domain).Select(g => g.Count());
                return counts.All(c => c >= 1 && c <= 20) && (counts.Count() > 3);
            })
            //.Select(u => new { UserId = u.Id, Counts = u.Ratings.GroupBy(r => r.Domain.Id).Select(g => g.Count().ToString()).Aggregate((a,b) => a + " " + b) })
            //.Select(a => a.UserId + "," + a.Counts);

            .SelectMany(u => u.Ratings.Where(r => r.Domain == musicDomain))
            //.SelectMany(u => u.Ratings.GroupBy(r => r.Item.Id).Select(g => g.Take(1).Single()))
            .Select(r => r.ToString());

            Console.WriteLine("Writing...");
            var header = new string[] { "UserId,ItemId,Rating" };

            // selected1: only music between 5 to 20
            // selected2: only music between 1 to 20
            // selected3: only music between 2 to 20
            // selected4: all domains with ratings between 1 to 20
            File.WriteAllLines("music_selected4.csv", header.Concat(output));

            //container.PrintStatistics();
        }
 public CrossDomainSimpleSplitter(CrossDomainDataContainer container)
 {
     Train = container.Ratings.Where(r => r.IsTest == false && r.Domain.IsTarget == true);
     Test  = container.Ratings.Where(r => r.IsTest == true && r.Domain.IsTarget == true);
 }
Example #4
0
        public void ReportStatistics()
        {
            // step 1: dataset
            var config = new CsvConfiguration()
            {
                Delimiter = ",",
                HasHeaderRecord = true
            };

            var container = new CrossDomainDataContainer();

            var bookDomain = new Domain("book", true);
            var musicDomain = new Domain("music");
            var dvdDomain = new Domain("dvd");
            var videoDomain = new Domain("video");

            var trainReader = new CsvReader(Paths.AmazonBooksTrain75, config, bookDomain);
            var testReader = new CsvReader(Paths.AmazonBooksTest25, config, bookDomain, true);
            var musicReader = new CsvReader(Paths.AmazonMusicRatings, config, musicDomain);
            var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain);
            var videoReader = new CsvReader(Paths.AmazonVideoRatings, config, videoDomain);

            trainReader.LoadData(container);
            testReader.LoadData(container);
            musicReader.LoadData(container);
            dvdReader.LoadData(container);
            videoReader.LoadData(container);

            container.WriteHistogram(Paths.AmazonProcessedPath);
        }
Example #5
0
        public void TestNewDataset()
        {
            // step 1: dataset
            var config = new CsvConfiguration()
            {
                Delimiter = ",",
                HasHeaderRecord = true
            };

            var container = new CrossDomainDataContainer();

            var bookDomain = new Domain("book", true);
            var musicDomain = new Domain("music");
            var dvdDomain = new Domain("dvd");
            var videoDomain = new Domain("video");

            var bookReader = new CsvReader("books_selected4.csv", config, bookDomain);
            //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain);
            //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true);
            var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain);
            var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain);
            var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain);

            bookReader.LoadData(container);
            //trainReader.LoadData(container);
            //testReader.LoadData(container);
            musicReader.LoadData(container);
            //dvdReader.LoadData(container);
            //videoReader.LoadData(container);

            container.PrintStatistics();
            //musicDomain.CacheUserData();

            var splitter = new CrossDomainSimpleSplitter(container, 0.25f);
            //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv");

            //return;
            //var splitter = new RatingSimpleSplitter(container, 0.25f);

            var numAuxRatings = new List<int> { 0, 1, 2, 3, 5, 7, 10 };

            var rmse = new List<string>();
            var mae = new List<string>();
            var durations = new List<string>();

            foreach (var num in numAuxRatings)
            {
                var startTime = DateTime.Now;

                // step 2: recommender
                LibFmTrainTester recommender;
                CrossDomainLibFmFeatureBuilder featureBuilder = null;

                if (num == 0)
                {
                    recommender = new LibFmTrainTester(experimentId: num.ToString());
                }
                else
                {
                    featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, num);
                    recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder);
                }

                // step3: evaluation
                var ctx = new EvalutationContext<ItemRating>(recommender, splitter);
                var ep = new EvaluationPipeline<ItemRating>(ctx);
                ep.Evaluators.Add(new RMSE());
                ep.Evaluators.Add(new MAE());
                ep.Run();

                //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s));

                rmse.Add(recommender.RMSE.ToString());
                mae.Add(ctx["MAE"].ToString());

                var duration = DateTime.Now.Subtract(startTime);
                durations.Add(((int)duration.TotalMilliseconds).ToString());
            }

            Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration");
            for (int i = 0; i < numAuxRatings.Count(); i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]);
            }
        }
Example #6
0
        public void TestCrossDomain()
        {
            // step 1: dataset
            var config = new CsvConfiguration()
            {
                Delimiter = ",",
                HasHeaderRecord = true
            };

            var container = new CrossDomainDataContainer();

            var domain1 = new Domain("domain1", true);
            var domain2 = new Domain("domain2");

            var trainReader = new CsvReader(Paths.TestDomain1Train, config, domain1);
            var auxReader = new CsvReader(Paths.TestDomain2, config, domain2);
            var testReader = new CsvReader(Paths.TestDomain1Test, config, domain1, true);

            trainReader.LoadData(container);
            auxReader.LoadData(container);
            testReader.LoadData(container);

            var dataset = new ItemRatingDataset(container);

            var featureBuilder = new CrossDomainLibFmFeatureBuilder(domain1);

            // step 2: recommender
            var recommender = new LibFmTrainTester(featureBuilder: featureBuilder);

            // step3: evaluation
            var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset));
            ep.Evaluators.Add(new RMSE());
            ep.Evaluators.Add(new MAE());

            ep.Run();

            // featureBuilder.Mapper.OriginalIDs.ToList().ForEach(Console.WriteLine);
            // featureBuilder.Mapper.InternalIDs.ToList().ForEach(Console.WriteLine);
        }
Example #7
0
        public void TestContainer(CrossDomainDataContainer container)
        {
            var us = container.Users.Values.Where(u => u.Id == "2305").Single();

            Console.WriteLine(us.Ratings.Where(r => r.Domain.Id == "book").Count());                        // 33
            Console.WriteLine(us.Ratings.Where(r => r.Domain.Id == "music").Count());                       // 3
            Console.WriteLine(us.Ratings.Where(r => r.IsTest == false).Count());                            // 29
            Console.WriteLine(us.Ratings.Where(r => r.IsTest == false && r.Domain.Id == "book").Count());   // 26
            Console.WriteLine(us.Ratings.Where(r => r.IsTest == true && r.Domain.Id == "book").Count());    // 7
        }
Example #8
0
        public void TestAuxDataSize()
        {
            // step 1: dataset
            var config = new CsvConfiguration()
            {
                Delimiter = ",",
                HasHeaderRecord = true
            };

            var container = new CrossDomainDataContainer();

            var bookDomain = new Domain("book", true);
            var musicDomain = new Domain("music");
            var dvdDomain = new Domain("dvd");
            var videoDomain = new Domain("video");

            var bookReader = new CsvReader("books_selected4.csv", config, bookDomain);
            //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain);
            //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true);
            var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain);
            var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain);
            var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain);

            bookReader.LoadData(container);
            //trainReader.LoadData(container);
            //testReader.LoadData(container);
            musicReader.LoadData(container);
            //dvdReader.LoadData(container);
            //videoReader.LoadData(container);

            container.PrintStatistics();

            var splitter = new CrossDomainSimpleSplitter(container, 0.25f);
            //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv");

            var rmse = new List<string>();
            var mae = new List<string>();
            var durations = new List<string>();

            for (int i = 0; i < 10; i++)
            {
                var startTime = DateTime.Now;

                musicDomain.ActivateData(0.1f);

                // step 2: recommender
                LibFmTrainTester recommender;
                CrossDomainLibFmFeatureBuilder featureBuilder = null;

                featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, 10);
                recommender = new LibFmTrainTester(experimentId: i.ToString(), featureBuilder: featureBuilder);

                // step3: evaluation
                var ctx = new EvalutationContext<ItemRating>(recommender, splitter);
                var ep = new EvaluationPipeline<ItemRating>(ctx);
                ep.Evaluators.Add(new RMSE());
                ep.Evaluators.Add(new MAE());
                ep.Run();

                rmse.Add(recommender.RMSE.ToString());
                mae.Add(ctx["MAE"].ToString());

                var duration = DateTime.Now.Subtract(startTime);
                durations.Add(((int)duration.TotalMilliseconds).ToString());
            }

            Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration");
            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}\t{3}", i, rmse[i], mae[i], durations[i]);
            }
        }
Example #9
0
        public void TestAmazonCrossDomainVideo()
        {
            // step 1: dataset
            var config = new CsvConfiguration()
            {
                Delimiter = ",",
                HasHeaderRecord = true
            };

            var container = new CrossDomainDataContainer();

            var bookDomain = new Domain("book");
            var musicDomain = new Domain("music");
            var dvdDomain = new Domain("dvd");
            var videoDomain = new Domain("video", true);

            var trainReader = new CsvReader(Paths.AmazonVideoTrain75, config, videoDomain);
            var testReader = new CsvReader(Paths.AmazonVideoTest25, config, videoDomain, true);
            var musicReader = new CsvReader(Paths.AmazonMusicRatings, config, musicDomain);
            var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain);
            var bookReader = new CsvReader(Paths.AmazonBooksRatings, config, bookDomain);

            //var tempReader = new LibFmReader(_ecirTrain, _ecirTest) { MainDomain = bookDomain, AuxDomain = musicDomain, UserDataPath = _musicUsersPath };

            trainReader.LoadData(container);
            testReader.LoadData(container);
            musicReader.LoadData(container);
            dvdReader.LoadData(container);
            bookReader.LoadData(container);
            //tempReader.LoadData(container);
            //container.ShuffleDomains();
            container.PrintStatistics();
            //musicDomain.CacheUserData();

            var splitter = new CrossDomainSimpleSplitter(container);
            //var splitter = new RatingSimpleSplitter(container);

            var numAuxRatings = new int[4] { 0, 1, 2, 3 };

            var rmse = new List<string>();
            var mae = new List<string>();
            var durations = new List<string>();

            foreach (var num in numAuxRatings)
            {
                var startTime = DateTime.Now;

                // step 2: recommender
                ITrainTester<ItemRating> recommender;
                CrossDomainLibFmFeatureBuilder featureBuilder = null;

                if (num == 0)
                {
                    recommender = new LibFmTrainTester(experimentId: num.ToString());
                }
                else
                {
                    featureBuilder = new CrossDomainLibFmFeatureBuilder(videoDomain, num);
                    //featureBuilder.LoadCachedUserData(_musicUsersPath);
                    recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder);
                }

                // step3: evaluation
                var ctx = new EvalutationContext<ItemRating>(recommender, splitter);
                var ep = new EvaluationPipeline<ItemRating>(ctx);
                ep.Evaluators.Add(new RMSE());
                ep.Evaluators.Add(new MAE());
                ep.Run();

                //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s));

                rmse.Add(ctx["RMSE"].ToString());
                mae.Add(ctx["MAE"].ToString());

                var duration = DateTime.Now.Subtract(startTime);
                durations.Add(((int)duration.TotalMilliseconds).ToString());
            }

            Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration");
            for (int i = 0; i < numAuxRatings.Count(); i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]);
            }
        }