public void TestMovieLensSingle() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = "::", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.MovieLens1MTrain75, config); var testReader = new CsvReader(Paths.MovieLens1MTest25, config, true); var container = new DataContainer(); trainReader.LoadData(container); testReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var recommender = new LibFmTrainTester(libFmPath: "LibFm.Net.64.exe"); // evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void TestImplicitTrust() { var socialReguls = new float[] { 1F }; var numFactors = new uint[] { 5, 10 }; var trustScores = new string[] { "trust_values_LATHIA.dat", "trust_values_HWANGCHEN.dat", "trust_values_ODONOVAN.dat", "trust_values_PEARSON.dat", "trust_values_SHAMBOURLU.dat" }; // step 1: dataset var config = new CsvConfiguration(); config.Delimiter = " "; var trainReader = new CsvReader <ItemRating>(Paths.EpinionTrain80, config, new ItemRatingMap()); var testReader = new CsvReader <ItemRating>(Paths.EpinionTest20, config, new ItemRatingMap()); var dataset = new Dataset <ItemRating>(trainReader, testReader); foreach (string scoreFile in trustScores) { var relations = File.ReadAllLines(Paths.EpinionRelationsImplicit + scoreFile).ToCsvDictionary('\t') .Select(i => new Relation() { UserId = i["UserId"], ConnectedId = i["ConnectionId"], ConnectionStrength = float.Parse(i["Strength"]) }); //.Where(r => r.ConnectionStrength > 1F); string rmseValues = "", maeValues = ""; foreach (uint num in numFactors) { // step 2: recommender var algorithm = new SocialMF(); algorithm.SocialRegularization = 1; algorithm.NumFactors = num; var recommender = new MediaLiteRatingPredictor(algorithm, relations); // step3: evaluation var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmseValues += context["RMSE"] + "\t"; maeValues += context["MAE"] + "\t"; } Console.WriteLine(scoreFile + "\t" + rmseValues + "\t" + maeValues); } }
public void TestExplicitTrust() { //var socialReguls = new float[] { 0.1F, 0.2F, 0.5F, 0.8F, 1F, 1.5F, 2F, 3F, 5F}; //var numFactors = new uint[] {2, 5, 10, 15, 20}; var socialReguls = new float[] { 1 }; var numFactors = new uint[] { 5, 10 }; // step 1: dataset var config = new CsvConfiguration(); config.Delimiter = " "; var trainReader = new CsvReader <ItemRating>(Paths.EpinionTrain75, config, new ItemRatingMap()); var testReader = new CsvReader <ItemRating>(Paths.EpinionTest25, config, new ItemRatingMap()); var dataset = new Dataset <ItemRating>(trainReader, testReader); var relations = File.ReadAllLines(Paths.EpinionRelations).ToCsvDictionary(' ') .Select(i => new Relation() { UserId = i["UserId"], ConnectedId = i["ConnectionId"], DatasetId = 1 }); foreach (uint num in numFactors) { string rmseValues = "", maeValues = ""; foreach (float regul in socialReguls) { // step 2: recommender var algorithm = new MatrixFactorization(); algorithm.NumFactors = num; //algorithm.SocialRegularization = regul; var recommender = new MediaLiteRatingPredictor(algorithm, relations); // step3: evaluation var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmseValues += context["RMSE"] + "\t"; maeValues += context["MAE"] + "\t"; } Console.WriteLine(num + "\t" + rmseValues + "\t" + maeValues); } }
public void Evaluate(EvalutationContext <ItemRating> context) { // make sure that the test samples are predicted context.RunDefaultTrainAndTest(); var output = context.Dataset.TestSamples.AsEnumerable() .Select(ir => { var t = _container.Tweets[ir]; return(string.Format("{0},{1},{2}", t.TwitterUserId, t.Id, ir.PredictedRating)); }); File.WriteAllLines(_outputFile, output); }
public void TestMovieLensWithClusters() { // Todo: instead of loading the dataset every time add a updateCluster method to the Dataset<ItemRatingWithCluster> List <string> rmses = new List <string>(); List <string> maes = new List <string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.MovieLens1MUsersCluster + i + ".csv"; string itemsClusterFile = Paths.MovieLens1MItemsCluster + i + ".csv"; MovieLensReader trainReader, testReader; if (i == 0) { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75); testReader = new MovieLensReader(Paths.MovieLens1MTest25); } else { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75, usersClusterFile, itemsClusterFile); testReader = new MovieLensReader(Paths.MovieLens1MTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset <MovieLensItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestEpinionClusters() { List <string> rmses = new List <string>(); List <string> maes = new List <string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.EpinionUsersCluster + i + ".csv"; string itemsClusterFile = Paths.EpinionItemsCluster + i + ".csv"; EpinionReader trainReader, testReader; if (i == 0) { trainReader = new EpinionReader(Paths.EpinionTrain75); testReader = new EpinionReader(Paths.EpinionTest25); } else { trainReader = new EpinionReader(Paths.EpinionTrain75, usersClusterFile, itemsClusterFile); testReader = new EpinionReader(Paths.EpinionTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset <EpinionItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestAmazonWithNmfCluster() { var trainReader = new AmazonReader(Paths.AmazonBooksTrain75, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var testReader = new AmazonReader(Paths.AmazonBooksTest25, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var dataset = new Dataset <ItemRatingWithClusters>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestEpinionsTrustAware() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = " ", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.EpinionTrain80, config); var testReader = new CsvReader(Paths.EpinionTest20, config, true); var readers = new List <IDatasetReader>() { trainReader, testReader }; var epinionTrustReader = new EpinionTrustReader(readers.ToArray(), Paths.EpinionRelationsImplicit); var container = new DataContainer(); epinionTrustReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var fb = new TrustAwareLibFmFeatureBuilder(container, 4, true); var recommender = new LibFmTrainTester(featureBuilder: fb); // evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void TestMovieLensSingleDomain() { int numDomains = 1; // load data var movieLensReader = new MovieLensCrossDomainReader(Paths.MovieLens1MMovies, Paths.MovieLens1M); var container = new MovieLensCrossDomainContainer(numDomains); movieLensReader.LoadData(container); // set taget and active domains var targetDomain = container.SpecifyTargetDomain("ml0"); container.PrintStatistics(); var startTime = DateTime.Now; var splitter = new CrossDomainSimpleSplitter(container, 0.25f); // recommender with non-CrossDomain feature builder var model = new MatrixFactorization(); model.NumIter = 50; model.NumFactors = 8; model.Regularization = 0.1f; //var recommender = new MediaLiteRatingPredictor(model); var recommender = new LibFmTrainTester(); // evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); //container.CreateClusterFiles(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.raw", Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.feat"); //container.WriteClusters(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust"); }
static void TrainTestFm() { var reader = new PlayingSessionReader(path); var container = new MusicDataContainer(); reader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); //var itemRecommender = new MostPopular(); //var model = new MediaLitePosFeedbakItemRecommender(itemRecommender); var fm = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(fm, splitter); var pipline = new EvaluationPipeline<ItemRating>(context); pipline.Evaluators.Add(new RMSE()); pipline.Evaluators.Add(new MAE()); pipline.Run(); }
static void TrainTestFm() { var reader = new PlayingSessionReader(path); var container = new MusicDataContainer(); reader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); //var itemRecommender = new MostPopular(); //var model = new MediaLitePosFeedbakItemRecommender(itemRecommender); var fm = new LibFmTrainTester(); var context = new EvalutationContext <ItemRating>(fm, splitter); var pipline = new EvaluationPipeline <ItemRating>(context); pipline.Evaluators.Add(new RMSE()); pipline.Evaluators.Add(new MAE()); pipline.Run(); }
static void TrainTest() { var reader = new PlayingSessionReader(path); var container = new MusicDataContainer(); reader.LoadData(container); var splitter = new PositiveFeedbackSimpleSplitter(container, 0.3f); //var itemRecommender = new MostPopular(); //var model = new MediaLitePosFeedbakItemRecommender(itemRecommender); var fm = new PosFeedbackLibFmTrainTester(); var context = new EvalutationContext <PositiveFeedback>(fm, splitter); //var pipline = new EvaluationPipeline<PositiveFeedback>(context); //pipline.Evaluators.Add(new MediaLitePositiveFeedbackEvaluators(itemRecommender)); //pipline.Run(); context.RunDefaultTrainAndTest(); }
static void TrainTest() { var reader = new PlayingSessionReader(path); var container = new MusicDataContainer(); reader.LoadData(container); var splitter = new PositiveFeedbackSimpleSplitter(container, 0.3f); //var itemRecommender = new MostPopular(); //var model = new MediaLitePosFeedbakItemRecommender(itemRecommender); var fm = new PosFeedbackLibFmTrainTester(); var context = new EvalutationContext<PositiveFeedback>(fm, splitter); //var pipline = new EvaluationPipeline<PositiveFeedback>(context); //pipline.Evaluators.Add(new MediaLitePositiveFeedbackEvaluators(itemRecommender)); //pipline.Run(); context.RunDefaultTrainAndTest(); }
public void TestAmazonAllDomains2() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new DataContainer(); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config); bookReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); var startTime = DateTime.Now; var recommender = new LibFmTrainTester(); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); Console.WriteLine("RMSE\t{0}\nDuration\t{1}", ctx["RMSE"], (int)duration.TotalMilliseconds); }
public void TestTrustWithFM() { // step 1: dataset var config = new CsvConfiguration(); config.Delimiter = " "; var trainReader = new CsvReader <ItemRating>(Paths.EpinionTrain80, config, new ItemRatingMap()); var testReader = new CsvReader <ItemRating>(Paths.EpinionTest20, config, new ItemRatingMap()); var relations = File.ReadAllLines(Paths.EpinionRelationsImplicit).ToCsvDictionary('\t') .Select(i => new Relation() { UserId = i["UserId"], ConnectedId = i["ConnectionId"], ConnectionStrength = float.Parse(i["Strength"]) }).ToList(); //.Where(r => r.ConnectionStrength > 1F); var trainWithRelations = new ItemRatingWithRelationReader(trainReader, relations); var testWithRelations = new ItemRatingWithRelationReader(testReader, relations); var dataset = new Dataset <ItemRatingWithRelations>(trainWithRelations, testWithRelations); Console.WriteLine("Features constructed."); // step 2: recommender var recommender = new LibFmTrainTester(); // step3: evaluation var context = new EvalutationContext <ItemRating>(recommender, dataset); var ep = new EvaluationPipeline <ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Run(); }
public void TestMovieLensSingle() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = "::", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.MovieLens1MTrain75, config); var testReader = new CsvReader(Paths.MovieLens1MTest25, config, true); var container = new DataContainer(); trainReader.LoadData(container); testReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var recommender = new LibFmTrainTester(libFmPath: "LibFm.Net.64.exe"); // evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void TestMovieLensAllDomains(int numDomains) { var numAuxRatings = new List<int> { 1 }; var movieLensReader = new MovieLensCrossDomainReader(Paths.MovieLens1MMovies, Paths.MovieLens1M); var container = new MovieLensCrossDomainContainer(numDomains, false); movieLensReader.LoadData(container); double[,] rmseMatrix = new double[numAuxRatings.Count, numDomains]; int[,] durationsMatrix = new int[numAuxRatings.Count, numDomains]; int[] numUsers = new int[numDomains]; int[] numItems = new int[numDomains]; int[] numRatings = new int[numDomains]; int domainIndex = 0; foreach (Domain d in container.Domains.Values) { var targetDomain = container.SpecifyTargetDomain(d.Id); Console.WriteLine("Target domain: {0}", d.ToString()); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); int numAuxIndex = 0; foreach (var num in numAuxRatings) { var startTime = DateTime.Now; LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(targetDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); rmseMatrix[numAuxIndex, domainIndex] = recommender.RMSE; durationsMatrix[numAuxIndex, domainIndex] = (int)duration.TotalMilliseconds; numAuxIndex++; } numUsers[domainIndex] = d.Ratings.Select(r => r.User.Id).Distinct().Count(); numItems[domainIndex] = d.Ratings.Select(r => r.Item.Id).Distinct().Count(); numRatings[domainIndex] = d.Ratings.Count; domainIndex++; } // Write RMSEs Console.WriteLine("\nRMSEs:\n"); string header = Enumerable.Range(1, numDomains).Select(i => "D" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + rmseMatrix[i, j]); } Console.WriteLine(); } // Write domain statistics string users = numUsers.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string items = numItems.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string ratings = numRatings.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); Console.WriteLine(); Console.WriteLine("Num Users\t" + users); Console.WriteLine("Num Items\t" + items); Console.WriteLine("Num Ratings\t" + ratings); // Write times Console.WriteLine("\nTimes:\n"); header = Enumerable.Range(1, numDomains).Select(i => "T" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + durationsMatrix[i, j]); } Console.WriteLine(); } Console.WriteLine("\n"); }
public void TestAmazonWithNmfCluster() { var trainReader = new AmazonReader(Paths.AmazonBooksTrain75, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var testReader = new AmazonReader(Paths.AmazonBooksTest25, Paths.AmazonBooksUsersCluster + ".nmf.u", Paths.AmazonBooksUsersCluster + ".nmf.i"); var dataset = new Dataset<ItemRatingWithClusters>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(recommender, dataset); var ep = new EvaluationPipeline<ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestEpinionClusters() { List<string> rmses = new List<string>(); List<string> maes = new List<string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.EpinionUsersCluster + i + ".csv"; string itemsClusterFile = Paths.EpinionItemsCluster + i + ".csv"; EpinionReader trainReader, testReader; if (i == 0) { trainReader = new EpinionReader(Paths.EpinionTrain75); testReader = new EpinionReader(Paths.EpinionTest25); } else { trainReader = new EpinionReader(Paths.EpinionTrain75, usersClusterFile, itemsClusterFile); testReader = new EpinionReader(Paths.EpinionTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset<EpinionItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(recommender, dataset); var ep = new EvaluationPipeline<ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestMovieLensWithClusters() { // Todo: instead of loading the dataset every time add a updateCluster method to the Dataset<ItemRatingWithCluster> List<string> rmses = new List<string>(); List<string> maes = new List<string>(); for (int i = 0; i < 15; i += 2) { string usersClusterFile = Paths.MovieLens1MUsersCluster + i + ".csv"; string itemsClusterFile = Paths.MovieLens1MItemsCluster + i + ".csv"; MovieLensReader trainReader, testReader; if (i == 0) { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75); testReader = new MovieLensReader(Paths.MovieLens1MTest25); } else { trainReader = new MovieLensReader(Paths.MovieLens1MTrain75, usersClusterFile, itemsClusterFile); testReader = new MovieLensReader(Paths.MovieLens1MTest25, usersClusterFile, itemsClusterFile); } var dataset = new Dataset<MovieLensItemRating>(trainReader, testReader); var recommender = new LibFmTrainTester(); var context = new EvalutationContext<ItemRating>(recommender, dataset); var ep = new EvaluationPipeline<ItemRating>(context); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmses.Add(context["RMSE"].ToString()); maes.Add(context["MAE"].ToString()); } Console.WriteLine("RMSEs--------------"); rmses.ForEach(Console.WriteLine); Console.WriteLine("MAEs-------------"); maes.ForEach(Console.WriteLine); }
public void TestNewDataset() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); //return; //var splitter = new RatingSimpleSplitter(container, 0.25f); var numAuxRatings = new List<int> { 0, 1, 2, 3, 5, 7, 10 }; var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestAuxDataSize() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); for (int i = 0; i < 10; i++) { var startTime = DateTime.Now; musicDomain.ActivateData(0.1f); // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, 10); recommender = new LibFmTrainTester(experimentId: i.ToString(), featureBuilder: featureBuilder); // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < 10; i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", i, rmse[i], mae[i], durations[i]); } }
public void TestAmazonCrossDomainVideo() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book"); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video", true); var trainReader = new CsvReader(Paths.AmazonVideoTrain75, config, videoDomain); var testReader = new CsvReader(Paths.AmazonVideoTest25, config, videoDomain, true); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config, bookDomain); //var tempReader = new LibFmReader(_ecirTrain, _ecirTest) { MainDomain = bookDomain, AuxDomain = musicDomain, UserDataPath = _musicUsersPath }; trainReader.LoadData(container); testReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); bookReader.LoadData(container); //tempReader.LoadData(container); //container.ShuffleDomains(); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container); //var splitter = new RatingSimpleSplitter(container); var numAuxRatings = new int[4] { 0, 1, 2, 3 }; var rmse = new List<string>(); var mae = new List<string>(); var durations = new List<string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender ITrainTester<ItemRating> recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(videoDomain, num); //featureBuilder.LoadCachedUserData(_musicUsersPath); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(ctx["RMSE"].ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestAmazonAllDomains2() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new DataContainer(); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config); bookReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); var startTime = DateTime.Now; var recommender = new LibFmTrainTester(); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); Console.WriteLine("RMSE\t{0}\nDuration\t{1}", ctx["RMSE"], (int)duration.TotalMilliseconds); }
public void TestAmazonCrossDomainMusic() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book"); var musicDomain = new Domain("music", true); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var trainReader = new CsvReader(Paths.AmazonMusicTrain75, config, musicDomain); var testReader = new CsvReader(Paths.AmazonMusicTest25, config, musicDomain, true); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config, bookDomain); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config, videoDomain); //var tempReader = new LibFmReader(_ecirTrain, _ecirTest) { MainDomain = bookDomain, AuxDomain = musicDomain, UserDataPath = _musicUsersPath }; trainReader.LoadData(container); testReader.LoadData(container); bookReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); //tempReader.LoadData(container); //container.ShuffleDomains(); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container); //var splitter = new RatingSimpleSplitter(container); var numAuxRatings = new List <int> { 0, 1, 2, 3 }; var rmse = new List <string>(); var mae = new List <string>(); var durations = new List <string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender ITrainTester <ItemRating> recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(musicDomain, num); //featureBuilder.LoadCachedUserData(_musicUsersPath); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(ctx["RMSE"].ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestNewDataset() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); //musicDomain.CacheUserData(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); //return; //var splitter = new RatingSimpleSplitter(container, 0.25f); var numAuxRatings = new List <int> { 0, 1, 2, 3, 5, 7, 10 }; var rmse = new List <string>(); var mae = new List <string>(); var durations = new List <string>(); foreach (var num in numAuxRatings) { var startTime = DateTime.Now; // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); //File.WriteAllLines("maps.txt", featureBuilder.Mapper.OriginalIDs.Zip(featureBuilder.Mapper.InternalIDs, (f, s) => f + "\t" + s)); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < numAuxRatings.Count(); i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", numAuxRatings[i], rmse[i], mae[i], durations[i]); } }
public void TestMovieLensSingleDomain() { int numDomains = 1; // load data var movieLensReader = new MovieLensCrossDomainReader(Paths.MovieLens1MMovies, Paths.MovieLens1M); var container = new MovieLensCrossDomainContainer(numDomains); movieLensReader.LoadData(container); // set taget and active domains var targetDomain = container.SpecifyTargetDomain("ml0"); container.PrintStatistics(); var startTime = DateTime.Now; var splitter = new CrossDomainSimpleSplitter(container, 0.25f); // recommender with non-CrossDomain feature builder var model = new MatrixFactorization(); model.NumIter = 50; model.NumFactors = 8; model.Regularization = 0.1f; //var recommender = new MediaLiteRatingPredictor(model); var recommender = new LibFmTrainTester(); // evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); //container.CreateClusterFiles(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.raw", Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust.feat"); //container.WriteClusters(Paths.MovieLens1M.GetDirectoryPath() + "\\movies.clust"); }
public void TestAuxDataSize() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new CrossDomainDataContainer(); var bookDomain = new Domain("book", true); var musicDomain = new Domain("music"); var dvdDomain = new Domain("dvd"); var videoDomain = new Domain("video"); var bookReader = new CsvReader("books_selected4.csv", config, bookDomain); //var trainReader = new CsvReader("books_selected1_train.csv", config, bookDomain); //var testReader = new CsvReader("books_selected1_test.csv", config, bookDomain, true); var musicReader = new CsvReader(Paths.AmazonAllMusicRatings, config, musicDomain); var dvdReader = new CsvReader(Paths.AmazonAllDvdRatings, config, dvdDomain); var videoReader = new CsvReader(Paths.AmazonAllVideoRatings, config, videoDomain); bookReader.LoadData(container); //trainReader.LoadData(container); //testReader.LoadData(container); musicReader.LoadData(container); //dvdReader.LoadData(container); //videoReader.LoadData(container); container.PrintStatistics(); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); //splitter.SaveSplitsAsCsv("books_selected1_train.csv", "books_selected1_test.csv"); var rmse = new List <string>(); var mae = new List <string>(); var durations = new List <string>(); for (int i = 0; i < 10; i++) { var startTime = DateTime.Now; musicDomain.ActivateData(0.1f); // step 2: recommender LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; featureBuilder = new CrossDomainLibFmFeatureBuilder(bookDomain, 10); recommender = new LibFmTrainTester(experimentId: i.ToString(), featureBuilder: featureBuilder); // step3: evaluation var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); rmse.Add(recommender.RMSE.ToString()); mae.Add(ctx["MAE"].ToString()); var duration = DateTime.Now.Subtract(startTime); durations.Add(((int)duration.TotalMilliseconds).ToString()); } Console.WriteLine("NumAuxRatings\tRMSE\tMAE\tDuration"); for (int i = 0; i < 10; i++) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", i, rmse[i], mae[i], durations[i]); } }
public void TestEpinionAllDomains(int numDomains = 3) { var numAuxRatings = new List <int> { 0, 1, 2, 3, 4 }; var epinionsReader = new EpinionsCrossDomainReader(Paths.EpinionRoot + "Epinions RED"); //var domainPaths = Enumerable.Range(1, numDomains) // .Select(i => string.Format("{0}Epinions RED\\Domains{1}-{2}.csv", Paths.EpinionRoot, numDomains, i)).ToArray(); //var epinionsReader = new EpinionsCrossDomainReader(domainPaths); var container = new EpinionsCrossDomainDataContainer(numDomains); epinionsReader.LoadData(container); container.Domains.Remove("ep0"); double[,] rmseMatrix = new double[numAuxRatings.Count, numDomains]; int[,] durationsMatrix = new int[numAuxRatings.Count, numDomains]; int[] numUsers = new int[numDomains]; int[] numItems = new int[numDomains]; int[] numRatings = new int[numDomains]; int domainIndex = 0; foreach (Domain d in container.Domains.Values) { var targetDomain = container.SpecifyTargetDomain(d.Id); Console.WriteLine("Target domain: {0}", d.ToString()); var splitter = new CrossDomainSimpleSplitter(container, 0.25f); int numAuxIndex = 0; foreach (var num in numAuxRatings) { var startTime = DateTime.Now; LibFmTrainTester recommender; CrossDomainLibFmFeatureBuilder featureBuilder = null; if (num == 0) { recommender = new LibFmTrainTester(experimentId: num.ToString()); } else { featureBuilder = new CrossDomainLibFmFeatureBuilder(targetDomain, num); recommender = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder); } var ctx = new EvalutationContext <ItemRating>(recommender, splitter); var ep = new EvaluationPipeline <ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); rmseMatrix[numAuxIndex, domainIndex] = recommender.RMSE; durationsMatrix[numAuxIndex, domainIndex] = (int)duration.TotalMilliseconds; numAuxIndex++; } numUsers[domainIndex] = d.Ratings.Select(r => r.User.Id).Distinct().Count(); numItems[domainIndex] = d.Ratings.Select(r => r.Item.Id).Distinct().Count(); numRatings[domainIndex] = d.Ratings.Count; domainIndex++; } // Write RMSEs Console.WriteLine("\nRMSEs:\n"); string header = Enumerable.Range(1, numDomains).Select(i => "D" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + rmseMatrix[i, j]); } Console.WriteLine(); } // Write domain statistics string users = numUsers.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string items = numItems.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); string ratings = numRatings.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b); Console.WriteLine(); Console.WriteLine("Num Users\t" + users); Console.WriteLine("Num Items\t" + items); Console.WriteLine("Num Ratings\t" + ratings); // Write times Console.WriteLine("\nTimes:\n"); header = Enumerable.Range(1, numDomains).Select(i => "T" + i).Aggregate((a, b) => a + "\t" + b); Console.WriteLine("Num aux. ratings\t" + header); for (int i = 0; i < numAuxRatings.Count; i++) { Console.Write(numAuxRatings[i]); for (int j = 0; j < numDomains; j++) { Console.Write("\t" + durationsMatrix[i, j]); } Console.WriteLine(); } Console.WriteLine("\n"); }