public void TestMovieLensSingle() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = "::", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.MovieLens1MTrain75, config); var testReader = new CsvReader(Paths.MovieLens1MTest25, config, true); var container = new DataContainer(); trainReader.LoadData(container); testReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var recommender = new LibFmTrainTester(libFmPath: "LibFm.Net.64.exe"); // evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void TestMovieLensWithClusters() { // Todo: instead of loading the dataset every time add a updateCluster method to the Dataset<ItemRatingWithCluster> List <string> rmses = new List <string>(); List <string> maes = new List <string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.MovieLens1MUsersCluster + i + ".csv"; string itemsClusterFile = Paths.MovieLens1MItemsCluster + i + ".csv"; MovieLensReader trainReader, testReader; if (i == 0) { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75); testReader = new MovieLensReader(Paths.MovieLens1MTest25); } else { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75, usersClusterFile, itemsClusterFile); testReader = new MovieLensReader(Paths.MovieLens1MTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset <MovieLensItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestEpinionClusters() { List <string> rmses = new List <string>(); List <string> maes = new List <string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.EpinionUsersCluster + i + ".csv"; string itemsClusterFile = Paths.EpinionItemsCluster + i + ".csv"; EpinionReader trainReader, testReader; if (i == 0) { trainReader = new EpinionReader(Paths.EpinionTrain75); testReader = new EpinionReader(Paths.EpinionTest25); } else { trainReader = new EpinionReader(Paths.EpinionTrain75, usersClusterFile, itemsClusterFile); testReader = new EpinionReader(Paths.EpinionTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset <EpinionItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestAmazonWithNmfCluster() { var trainReader = new AmazonReader(Paths.AmazonBooksTrain75, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var testReader = new AmazonReader(Paths.AmazonBooksTest25, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var dataset = new Dataset <ItemRatingWithClusters>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestEpinionsTrustAware() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = " ", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.EpinionTrain80, config); var testReader = new CsvReader(Paths.EpinionTest20, config, true); var readers = new List <IDatasetReader>() { trainReader, testReader }; var epinionTrustReader = new EpinionTrustReader(readers.ToArray(), Paths.EpinionRelationsImplicit); var container = new DataContainer(); epinionTrustReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var fb = new TrustAwareLibFmFeatureBuilder(container, 4, true); var recommender = new LibFmTrainTester(featureBuilder: fb); // evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void TestMovieLensSingleDomain() { int numDomains = 1; // load data var movieLensReader = new MovieLensCrossDomainReader(Paths.MovieLens1MMovies, Paths.MovieLens1M); var container = new MovieLensCrossDomainContainer(numDomains); movieLensReader.LoadData(container); // set taget and active domains var targetDomain = container.SpecifyTargetDomain("ml0"); container.PrintStatistics(); var startTime = DateTime.Now; var splitter = new CrossDomainSimpleSplitter(container, 0.25f); // recommender with non-CrossDomain feature builder var model = new MatrixFactorization(); model.NumIter = 50; model.NumFactors = 8; model.Regularization = 0.1f; //var recommender = new MediaLiteRatingPredictor(model); var recommender = new LibFmTrainTester(); // evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); //container.CreateClusterFiles(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.raw", Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.feat"); //container.WriteClusters(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust"); }
public void TestMovieLens() { //var dataset1 = new Dataset<MovieLensItemRating>(new MovieLensReader(Paths.MovieLens1M), 0.3); //var c = new Clusterer(dataset1); //for (int i = 2; i < 15; i += 2) //{ // c.WriteCluster(Paths.MovieLens1MItemsCluster + i + ".csv", i); //} //return; // step 1: dataset var lines = File.ReadAllLines(Paths.MovieLens1M).Shuffle(1).Take(50000).ToList(); for (int i = 2; i < 15; i += 2) { //var dataset = new Dataset<MovieLensItemRating>(new MovieLensReader(Paths.MovieLens1M, Paths.MovieLens1MUsersCluster + i + ".csv", Paths.MovieLens1MItemsCluster + i + ".csv", lines), 0.3); //var c = new Clusterer(dataset); //for (int i = 2; i < 15; i += 2) //{ // c.WriteCluster(Paths.MovieLens1MUsersCluster + i + ".csv", i); //} //return; // step 2: recommender //var recommender = new MediaLiteRatingPredictor(new BiasedMatrixFactorization()); var recommender = new LibFmTrainTester(); // step3: evaluation //var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset)); //ep.Evaluators.Add(new RMSE()); //ep.Run(); } }
static void TrainTestFm() { var reader = new PlayingSessionReader(path); var container = new MusicDataContainer(); reader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); //var itemRecommender = new MostPopular(); //var model = new MediaLitePosFeedbakItemRecommender(itemRecommender); var fm = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(fm, splitter); var pipline = new EvaluationPipeline<ItemRating>(context); pipline.Evaluators.Add(new RMSE()); pipline.Evaluators.Add(new MAE()); pipline.Run(); }
public void TestCrossDomain() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var domain1 = new Domain("domain1", true); var domain2 = new Domain("domain2"); var trainReader = new CsvReader(Paths.TestDomain1Train, config, domain1); var auxReader = new CsvReader(Paths.TestDomain2, config, domain2); var testReader = new CsvReader(Paths.TestDomain1Test, config, domain1, true); trainReader.LoadData(container); auxReader.LoadData(container); testReader.LoadData(container); var dataset = new ItemRatingDataset(container); var featureBuilder = new CrossDomainLibFmFeatureBuilder(domain1); // step 2: recommender var recommender = new LibFmTrainTester(featureBuilder: featureBuilder); // step3: evaluation var ep = new EvaluationPipeline <ItemRating>(new EvalutationContext <ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); // featureBuilder.Mapper.OriginalIDs.ToList().ForEach(Console.WriteLine); // featureBuilder.Mapper.InternalIDs.ToList().ForEach(Console.WriteLine); }
static void TrainTestFm() { var reader = new PlayingSessionReader(path); var container = new MusicDataContainer(); reader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); //var itemRecommender = new MostPopular(); //var model = new MediaLitePosFeedbakItemRecommender(itemRecommender); var fm = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(fm, splitter); var pipline = new EvaluationPipeline <ItemRating>(context); pipline.Evaluators.Add(new RMSE()); pipline.Evaluators.Add(new MAE()); pipline.Run(); }
public void TestAmazonAllDomains2() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new DataContainer(); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config); bookReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); var startTime = DateTime.Now; var recommender = new LibFmTrainTester(); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); Console.WriteLine("RMSE\t{0}\nDuration\t{1}", ctx["RMSE"], (int)duration.TotalMilliseconds); }
public void TestTrustWithFM() { // step 1: dataset var config = new CsvConfiguration(); config.Delimiter = " "; var trainReader = new CsvReader <ItemRating>(Paths.EpinionTrain80, config, new ItemRatingMap()); var testReader = new CsvReader <ItemRating>(Paths.EpinionTest20, config, new ItemRatingMap()); var relations = File.ReadAllLines(Paths.EpinionRelationsImplicit).ToCsvDictionary('\t') .Select(i => new Relation() { UserId = i["UserId"], ConnectedId = i["ConnectionId"], ConnectionStrength = float.Parse(i["Strength"]) }).ToList(); //.Where(r => r.ConnectionStrength > 1F); var trainWithRelations = new ItemRatingWithRelationReader(trainReader, relations); var testWithRelations = new ItemRatingWithRelationReader(testReader, relations); var dataset = new Dataset <ItemRatingWithRelations>(trainWithRelations, testWithRelations); Console.WriteLine("Features constructed."); // step 2: recommender var recommender = new LibFmTrainTester(); // step3: evaluation var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Run(); }
public void TestMovieLensSingle() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = "::", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.MovieLens1MTrain75, config); var testReader = new CsvReader(Paths.MovieLens1MTest25, config, true); var container = new DataContainer(); trainReader.LoadData(container); testReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var recommender = new LibFmTrainTester(libFmPath: "LibFm.Net.64.exe"); // evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void TestAmazonDatasetSingleNewModel() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var trainContainer = new DataContainer(); var testContainer = new DataContainer(); var trainReader = new CsvReader(Paths.AmazonBooksTrain75, config); var testReader = new CsvReader(Paths.AmazonBooksTest25, config); trainReader.LoadData(trainContainer); testReader.LoadData(testContainer); var dataset = new ItemRatingDataset(trainContainer, testContainer); //var featureBuilder = new LibFmFeatureBuilder(); // step 2: recommender var recommender = new LibFmTrainTester(); // step3: evaluation var ep = new EvaluationPipeline <ItemRating>(new EvalutationContext <ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestEpinionClusters() { List<string> rmses = new List<string>(); List<string> maes = new List<string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.EpinionUsersCluster + i + ".csv"; string itemsClusterFile = Paths.EpinionItemsCluster + i + ".csv"; EpinionReader trainReader, testReader; if (i == 0) { trainReader = new EpinionReader(Paths.EpinionTrain75); testReader = new EpinionReader(Paths.EpinionTest25); } else { trainReader = new EpinionReader(Paths.EpinionTrain75, usersClusterFile, itemsClusterFile); testReader = new EpinionReader(Paths.EpinionTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset<EpinionItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(recommender, dataset); var ep = new EvaluationPipeline<ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestNewDataset() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); //return; //var splitter = new RatingSimpleSplitter(container, 0.25f); var numAuxRatings = new List <int> { 0, 1, 2, 3, 5, 7, 10 }; var rmse = new List <string>(); var mae = new List <string>(); var durations = new List <string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestAmazonWithNmfCluster() { var trainReader = new AmazonReader(Paths.AmazonBooksTrain75, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var testReader = new AmazonReader(Paths.AmazonBooksTest25, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var dataset = new Dataset<ItemRatingWithClusters>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(recommender, dataset); var ep = new EvaluationPipeline<ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestNewDataset() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); //return; //var splitter = new RatingSimpleSplitter(container, 0.25f); var numAuxRatings = new List<int> { 0, 1, 2, 3, 5, 7, 10 }; var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestCrossDomain() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var domain1 = new Domain("domain1", true); var domain2 = new Domain("domain2"); var trainReader = new CsvReader(Paths.TestDomain1Train, config, domain1); var auxReader = new CsvReader(Paths.TestDomain2, config, domain2); var testReader = new CsvReader(Paths.TestDomain1Test, config, domain1, true); trainReader.LoadData(container); auxReader.LoadData(container); testReader.LoadData(container); var dataset = new ItemRatingDataset(container); var featureBuilder = new CrossDomainLibFmFeatureBuilder(domain1); // step 2: recommender var recommender = new LibFmTrainTester(featureBuilder: featureBuilder); // step3: evaluation var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); // featureBuilder.Mapper.OriginalIDs.ToList().ForEach(Console.WriteLine); // featureBuilder.Mapper.InternalIDs.ToList().ForEach(Console.WriteLine); }
public void TestAuxDataSize() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); for (int i = 0; i < 10; i++) { var startTime = DateTime.Now; musicDomain.ActivateData(0.1f); // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, 10); recommender = new LibFmTrainTester(experimentId: i.ToString(), featureBuilder: featureBuilder); // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < 10; i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", i, rmse[i], mae[i], durations[i]); } }
public void TestAmazonDatasetSingleNewModel() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var trainContainer = new DataContainer(); var testContainer = new DataContainer(); var trainReader = new CsvReader(Paths.AmazonBooksTrain75, config); var testReader = new CsvReader(Paths.AmazonBooksTest25, config); trainReader.LoadData(trainContainer); testReader.LoadData(testContainer); var dataset = new ItemRatingDataset(trainContainer, testContainer); //var featureBuilder = new LibFmFeatureBuilder(); // step 2: recommender var recommender = new LibFmTrainTester(); // step3: evaluation var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestAmazonCrossDomainVideo() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book"); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video", true); var trainReader = new CsvReader(Paths.AmazonVideoTrain75, config, videoDomain); var testReader = new CsvReader(Paths.AmazonVideoTest25, config, videoDomain, true); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config, bookDomain); //var tempReader = new LibFmReader(_ecirTrain, _ecirTest) { MainDomain = bookDomain, AuxDomain = musicDomain, UserDataPath = _musicUsersPath }; trainReader.LoadData(container); testReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); bookReader.LoadData(container); //tempReader.LoadData(container); //container.ShuffleDomains(); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container); //var splitter = new RatingSimpleSplitter(container); var numAuxRatings = new int[4] { 0, 1, 2, 3 }; var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender ITrainTester<ItemRating> recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(videoDomain, num); //featureBuilder.LoadCachedUserData(_musicUsersPath); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(ctx["RMSE"].ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestAmazonCrossDomainMusic() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book"); var musicDomain = new Domain("music", true); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var trainReader = new CsvReader(Paths.AmazonMusicTrain75, config, musicDomain); var testReader = new CsvReader(Paths.AmazonMusicTest25, config, musicDomain, true); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config, bookDomain); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config, videoDomain); //var tempReader = new LibFmReader(_ecirTrain, _ecirTest) { MainDomain = bookDomain, AuxDomain = musicDomain, UserDataPath = _musicUsersPath }; trainReader.LoadData(container); testReader.LoadData(container); bookReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); //tempReader.LoadData(container); //container.ShuffleDomains(); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container); //var splitter = new RatingSimpleSplitter(container); var numAuxRatings = new List <int> { 0, 1, 2, 3 }; var rmse = new List <string>(); var mae = new List <string>(); var durations = new List <string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender ITrainTester <ItemRating> recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(musicDomain, num); //featureBuilder.LoadCachedUserData(_musicUsersPath); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(ctx["RMSE"].ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestMovieLensWithClusters() { // Todo: instead of loading the dataset every time add a updateCluster method to the Dataset<ItemRatingWithCluster> List<string> rmses = new List<string>(); List<string> maes = new List<string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.MovieLens1MUsersCluster + i + ".csv"; string itemsClusterFile = Paths.MovieLens1MItemsCluster + i + ".csv"; MovieLensReader trainReader, testReader; if (i == 0) { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75); testReader = new MovieLensReader(Paths.MovieLens1MTest25); } else { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75, usersClusterFile, itemsClusterFile); testReader = new MovieLensReader(Paths.MovieLens1MTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset<MovieLensItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(recommender, dataset); var ep = new EvaluationPipeline<ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestMovieLensSingleDomain() { int numDomains = 1; // load data var movieLensReader = new MovieLensCrossDomainReader(Paths.MovieLens1MMovies, Paths.MovieLens1M); var container = new MovieLensCrossDomainContainer(numDomains); movieLensReader.LoadData(container); // set taget and active domains var targetDomain = container.SpecifyTargetDomain("ml0"); container.PrintStatistics(); var startTime = DateTime.Now; var splitter = new CrossDomainSimpleSplitter(container, 0.25f); // recommender with non-CrossDomain feature builder var model = new MatrixFactorization(); model.NumIter = 50; model.NumFactors = 8; model.Regularization = 0.1f; //var recommender = new MediaLiteRatingPredictor(model); var recommender = new LibFmTrainTester(); // evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); //container.CreateClusterFiles(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.raw", Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.feat"); //container.WriteClusters(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust"); }
public void TestEpinionAllDomains(int numDomains = 3) { var numAuxRatings = new List <int> { 0, 1, 2, 3, 4 }; var epinionsReader = new EpinionsCrossDomainReader(Paths.EpinionRoot + "Epinions RED"); //var domainPaths = Enumerable.Range(1, numDomains) // .Select(i => string.Format("{0}Epinions RED\\Domains{1}-{2}.csv", Paths.EpinionRoot, numDomains, i)).ToArray(); //var epinionsReader = new EpinionsCrossDomainReader(domainPaths); var container = new EpinionsCrossDomainDataContainer(numDomains); epinionsReader.LoadData(container); container.Domains.Remove("ep0"); double[,] rmseMatrix = new double[numAuxRatings.Count, numDomains]; int[,] durationsMatrix = new int[numAuxRatings.Count, numDomains]; int[] numUsers = new int[numDomains]; int[] numItems = new int[numDomains]; int[] numRatings = new int[numDomains]; int domainIndex = 0; foreach (Domain d in container.Domains.Values) { var targetDomain = container.SpecifyTargetDomain(d.Id); Console.WriteLine("Target domain: {0}", d.ToString()); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); int numAuxIndex = 0; foreach (var num in numAuxRatings) { var startTime = DateTime.Now; LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(targetDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); rmseMatrix[numAuxIndex, domainIndex] = recommender.RMSE; durationsMatrix[numAuxIndex, domainIndex] = (int)duration.TotalMilliseconds; numAuxIndex++; } numUsers[domainIndex] = d.Ratings.Select(r => r.User.Id).Distinct().Count(); numItems[domainIndex] = d.Ratings.Select(r => r.Item.Id).Distinct().Count(); numRatings[domainIndex] = d.Ratings.Count; domainIndex++; } // Write RMSEs Console.WriteLine("\nRMSEs:\n"); string header = Enumerable.Range(1, numDomains).Select(i => "D" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + rmseMatrix[i, j]); } Console.WriteLine(); } // Write domain statistics string users = numUsers.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string items = numItems.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string ratings = numRatings.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); Console.WriteLine(); Console.WriteLine("Num Users\t" + users); Console.WriteLine("Num Items\t" + items); Console.WriteLine("Num Ratings\t" + ratings); // Write times Console.WriteLine("\nTimes:\n"); header = Enumerable.Range(1, numDomains).Select(i => "T" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + durationsMatrix[i, j]); } Console.WriteLine(); } Console.WriteLine("\n"); }
public void TestAuxDataSize() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); var rmse = new List <string>(); var mae = new List <string>(); var durations = new List <string>(); for (int i = 0; i < 10; i++) { var startTime = DateTime.Now; musicDomain.ActivateData(0.1f); // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, 10); recommender = new LibFmTrainTester(experimentId: i.ToString(), featureBuilder: featureBuilder); // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < 10; i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", i, rmse[i], mae[i], durations[i]); } }
public void TestMovieLensAllDomains(int numDomains) { var numAuxRatings = new List<int> { 1 }; var movieLensReader = new MovieLensCrossDomainReader(Paths.MovieLens1MMovies, Paths.MovieLens1M); var container = new MovieLensCrossDomainContainer(numDomains, false); movieLensReader.LoadData(container); double[,] rmseMatrix = new double[numAuxRatings.Count, numDomains]; int[,] durationsMatrix = new int[numAuxRatings.Count, numDomains]; int[] numUsers = new int[numDomains]; int[] numItems = new int[numDomains]; int[] numRatings = new int[numDomains]; int domainIndex = 0; foreach (Domain d in container.Domains.Values) { var targetDomain = container.SpecifyTargetDomain(d.Id); Console.WriteLine("Target domain: {0}", d.ToString()); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); int numAuxIndex = 0; foreach (var num in numAuxRatings) { var startTime = DateTime.Now; LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(targetDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); rmseMatrix[numAuxIndex, domainIndex] = recommender.RMSE; durationsMatrix[numAuxIndex, domainIndex] = (int)duration.TotalMilliseconds; numAuxIndex++; } numUsers[domainIndex] = d.Ratings.Select(r => r.User.Id).Distinct().Count(); numItems[domainIndex] = d.Ratings.Select(r => r.Item.Id).Distinct().Count(); numRatings[domainIndex] = d.Ratings.Count; domainIndex++; } // Write RMSEs Console.WriteLine("\nRMSEs:\n"); string header = Enumerable.Range(1, numDomains).Select(i => "D" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + rmseMatrix[i, j]); } Console.WriteLine(); } // Write domain statistics string users = numUsers.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string items = numItems.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string ratings = numRatings.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); Console.WriteLine(); Console.WriteLine("Num Users\t" + users); Console.WriteLine("Num Items\t" + items); Console.WriteLine("Num Ratings\t" + ratings); // Write times Console.WriteLine("\nTimes:\n"); header = Enumerable.Range(1, numDomains).Select(i => "T" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + durationsMatrix[i, j]); } Console.WriteLine(); } Console.WriteLine("\n"); }
public void TestAmazonAllDomains2() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new DataContainer(); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config); bookReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); var startTime = DateTime.Now; var recommender = new LibFmTrainTester(); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); Console.WriteLine("RMSE\t{0}\nDuration\t{1}", ctx["RMSE"], (int)duration.TotalMilliseconds); }