public void TestMovieLensSingle() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = "::", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.MovieLens1MTrain75, config); var testReader = new CsvReader(Paths.MovieLens1MTest25, config, true); var container = new DataContainer(); trainReader.LoadData(container); testReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var recommender = new LibFmTrainTester(libFmPath: "LibFm.Net.64.exe"); // evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void CreateDatasetsFromOriginalDataset() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book"); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader(Paths.AmazonAllBookRatings, config, bookDomain); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); var output = container.Users.Values.Where(u => { var counts = u.Ratings.GroupBy(r => r.Domain).Select(g => g.Count()); return counts.All(c => c >= 1 && c <= 20) && (counts.Count() > 3); }) //.Select(u => new { UserId = u.Id, Counts = u.Ratings.GroupBy(r => r.Domain.Id).Select(g => g.Count().ToString()).Aggregate((a,b) => a + " " + b) }) //.Select(a => a.UserId + "," + a.Counts); .SelectMany(u => u.Ratings.Where(r => r.Domain == musicDomain)) //.SelectMany(u => u.Ratings.GroupBy(r => r.Item.Id).Select(g => g.Take(1).Single())) .Select(r => r.ToString()); Console.WriteLine("Writing..."); var header = new string[] { "UserId,ItemId,Rating" }; // selected1: only music between 5 to 20 // selected2: only music between 1 to 20 // selected3: only music between 2 to 20 // selected4: all domains with ratings between 1 to 20 File.WriteAllLines("music_selected4.csv", header.Concat(output)); //container.PrintStatistics(); }
public void ReportStatistics() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var trainReader = new CsvReader(Paths.AmazonBooksTrain75, config, bookDomain); var testReader = new CsvReader(Paths.AmazonBooksTest25, config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config, videoDomain); trainReader.LoadData(container); testReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); container.WriteHistogram(Paths.AmazonProcessedPath); }
public void TestNewDataset() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); //return; //var splitter = new RatingSimpleSplitter(container, 0.25f); var numAuxRatings = new List<int> { 0, 1, 2, 3, 5, 7, 10 }; var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestCrossDomain() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var domain1 = new Domain("domain1", true); var domain2 = new Domain("domain2"); var trainReader = new CsvReader(Paths.TestDomain1Train, config, domain1); var auxReader = new CsvReader(Paths.TestDomain2, config, domain2); var testReader = new CsvReader(Paths.TestDomain1Test, config, domain1, true); trainReader.LoadData(container); auxReader.LoadData(container); testReader.LoadData(container); var dataset = new ItemRatingDataset(container); var featureBuilder = new CrossDomainLibFmFeatureBuilder(domain1); // step 2: recommender var recommender = new LibFmTrainTester(featureBuilder: featureBuilder); // step3: evaluation var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); // featureBuilder.Mapper.OriginalIDs.ToList().ForEach(Console.WriteLine); // featureBuilder.Mapper.InternalIDs.ToList().ForEach(Console.WriteLine); }
public void TestAuxDataSize() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); for (int i = 0; i < 10; i++) { var startTime = DateTime.Now; musicDomain.ActivateData(0.1f); // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, 10); recommender = new LibFmTrainTester(experimentId: i.ToString(), featureBuilder: featureBuilder); // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < 10; i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", i, rmse[i], mae[i], durations[i]); } }
public void TestAmazonDatasetSingleNewModel() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var trainContainer = new DataContainer(); var testContainer = new DataContainer(); var trainReader = new CsvReader(Paths.AmazonBooksTrain75, config); var testReader = new CsvReader(Paths.AmazonBooksTest25, config); trainReader.LoadData(trainContainer); testReader.LoadData(testContainer); var dataset = new ItemRatingDataset(trainContainer, testContainer); //var featureBuilder = new LibFmFeatureBuilder(); // step 2: recommender var recommender = new LibFmTrainTester(); // step3: evaluation var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestAmazonDatasetSingle() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var trainReader = new CsvReader<ItemRating>(Paths.AmazonBooksTrain75, config, new ItemRatingMap()); var testReader = new CsvReader<ItemRating>(Paths.AmazonBooksTest25, config, new ItemRatingMap()); var dataset = new Dataset<ItemRating>(trainReader, testReader); // step 2: recommender var recommender = new MediaLiteRatingPredictor(new BiasedMatrixFactorization()); // step3: evaluation var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestAmazonCrossDomainVideo() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book"); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video", true); var trainReader = new CsvReader(Paths.AmazonVideoTrain75, config, videoDomain); var testReader = new CsvReader(Paths.AmazonVideoTest25, config, videoDomain, true); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config, bookDomain); //var tempReader = new LibFmReader(_ecirTrain, _ecirTest) { MainDomain = bookDomain, AuxDomain = musicDomain, UserDataPath = _musicUsersPath }; trainReader.LoadData(container); testReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); bookReader.LoadData(container); //tempReader.LoadData(container); //container.ShuffleDomains(); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container); //var splitter = new RatingSimpleSplitter(container); var numAuxRatings = new int[4] { 0, 1, 2, 3 }; var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender ITrainTester<ItemRating> recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(videoDomain, num); //featureBuilder.LoadCachedUserData(_musicUsersPath); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(ctx["RMSE"].ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestAmazonAllDomains2() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new DataContainer(); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config); bookReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); var startTime = DateTime.Now; var recommender = new LibFmTrainTester(); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); Console.WriteLine("RMSE\t{0}\nDuration\t{1}", ctx["RMSE"], (int)duration.TotalMilliseconds); }