public void LoadData(DataContainer container) { if (!(container is MovieTweetingsDataContainer)) { throw new Exception("The data container should have type MovieTweetingsDataContainer."); } var mtContainer = (MovieTweetingsDataContainer)container; Console.WriteLine("Importing training set..."); foreach (string l in File.ReadAllLines(_trainSet).Skip(1)) { var tokens = l.Split(','); string tweetJson = tokens.Skip(4).Aggregate((a, b) => a + ',' + b); mtContainer.AddMovieTweeting(tokens[0], tokens[1], float.Parse(tokens[2]), tweetJson, false); } Console.WriteLine("Importing test set..."); foreach (string l in File.ReadAllLines(_testSet).Skip(1)) { var tokens = l.Split(','); string tweetJson = tokens.Skip(4).Aggregate((a, b) => a + ',' + b); mtContainer.AddMovieTweeting(tokens[0], tokens[1], float.Parse(tokens[2]), tweetJson, true); } }
public void LoadData(DataContainer container) { if (!(container is MusicDataContainer)) { throw new Exception("The data container should have type MusicDataContainer."); } var mContainer = (MusicDataContainer)container; foreach (string line in File.ReadAllLines(_path)) { var tokens = line.Split(','); var us = new UserSession() { User = mContainer.AddUser(tokens[0]) }; var tracks = tokens.Skip(2).Aggregate((a, b) => a + "," + b).Split('|'); foreach (string track in tracks.Take(tracks.Length - 1)) { var parts = track.Split(',').Reverse().ToList(); //mContainer.AddPositiveFeedback(tokens[0], parts[0]); mContainer.AddRating(tokens[0], parts[2], float.Parse(parts[1]) / 120); var item = mContainer.AddItem(parts[2]); us.Items.Add(item); } mContainer.Sessions.Add(us); } }
public void TestMovieLensSingle() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = "::", HasHeaderRecord = true }; // load data var trainReader = new CsvReader(Paths.MovieLens1MTrain75, config); var testReader = new CsvReader(Paths.MovieLens1MTest25, config, true); var container = new DataContainer(); trainReader.LoadData(container); testReader.LoadData(container); var startTime = DateTime.Now; var splitter = new RatingSimpleSplitter(container); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); var recommender = new LibFmTrainTester(libFmPath: "LibFm.Net.64.exe"); // evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Run(); var duration = (int)DateTime.Now.Subtract(startTime).TotalMilliseconds; Console.WriteLine("RMSE\tDuration\n{0}\t{1}", ctx["RMSE"], duration); }
public void LoadData(DataContainer container) { if (!(container is MovieLensCrossDomainContainer)) throw new WrapRecException("The data container should have type MovieLensCrossDomainContainer."); var mlContainer = (MovieLensCrossDomainContainer)container; Console.WriteLine("Reading movies..."); foreach(string l in File.ReadAllLines(MoviesPath)) { var parts = l.Split(new string[] { "::" }, StringSplitOptions.RemoveEmptyEntries); var item = mlContainer.AddItem(parts[0]); item.Properties["genres"] = parts[2]; } Console.WriteLine("Creating domains..."); //mlContainer.CreateItemClusters(MoviesPath.GetDirectoryPath() + "\\GenreClusters.csv"); mlContainer.CreateDominantGenre(MoviesPath.GetDirectoryPath() + "\\DominantGenre.csv"); Console.WriteLine("Reading ratings..."); foreach (string l in File.ReadAllLines(RatingsPath).Skip(1)) { var parts = l.Split(new string[] { "::" }, StringSplitOptions.RemoveEmptyEntries); var ir = mlContainer.AddRating(parts[0], parts[1], float.Parse(parts[2]), false); ir.Properties["timestamp"] = parts[3]; } //mlContainer.CreateDomainsBasedOnDate(); //mlContainer.CreateDomainsWithEvenlyDistributedUsers(); }
public void LoadData(DataContainer container) { if (!(container is EpinionsCrossDomainDataContainer)) throw new WrapRecException("The data container should have type EpinionsCrossDomainDataContainer."); var ecdContainer = (EpinionsCrossDomainDataContainer) container; if (string.IsNullOrEmpty(FolderPath)) DomainsLoad(ecdContainer); else NormalRead(ecdContainer); }
public void LoadData(DataContainer container) { if (!(container is CrowdRecDataContainer)) { throw new Exception("The data container should have type CrowdRecDataContainer."); } var crContainer = (CrowdRecDataContainer)container; LoadEntities(crContainer); LoadRelations(crContainer); }
public void LoadData(DataContainer container) { if (Domain != null) { ((CrossDomainDataContainer)container).CurrentDomain = Domain; } while (_reader.Read()) { // userId, itemId, rating container.AddRating(_reader.GetField(0), _reader.GetField(1), float.Parse(_reader.GetField(2)), IsTestReader); } }
public void LoadData(DataContainer container) { var cdContainer = (CrossDomainDataContainer)container; Dictionary<string, int> numAuxUserRatings = File.ReadAllLines(UserDataPath) .Select(l => { var parts = l.Split(' '); return new { UserId = parts[0], Count = int.Parse(parts[1]) }; }) .ToDictionary(d => d.UserId, d => d.Count); foreach (var l in File.ReadAllLines(TrainFile)) { var parts = l.Split(' '); float rating = float.Parse(parts[0]); string userId = parts[1].Split(':')[0]; string itemId = parts[2].Split(':')[0]; cdContainer.CurrentDomain = MainDomain; cdContainer.AddRating(userId, itemId, rating, false); cdContainer.CurrentDomain = AuxDomain; for (int i = 3; i < parts.Length; i++) { var itemRating = parts[i].Split(':'); cdContainer.AddRating(userId, itemRating[0], float.Parse(itemRating[1]) * 10, false); } } foreach (var l in File.ReadAllLines(TestFile)) { var parts = l.Split(' '); float rating = float.Parse(parts[0]); string userId = parts[1].Split(':')[0]; string itemId = parts[2].Split(':')[0]; cdContainer.CurrentDomain = MainDomain; cdContainer.AddRating(userId, itemId, rating, true); cdContainer.CurrentDomain = AuxDomain; for (int i = 3; i < parts.Length; i++) { var itemRating = parts[i].Split(':'); cdContainer.AddRating(userId, itemRating[0], float.Parse(itemRating[1]), false); } } }
public void LoadData(DataContainer container) { // load standard rating data into container foreach (var reader in EpinionsReaders) { reader.LoadData(container); } // add relation specific data foreach (var line in File.ReadAllLines(RelationsPath).Skip(1)) { var parts = line.TrimStart(' ').Split('\t'); // make sure that the "u" suffix is also added when data is loaded into container string userId = parts[0] + "u"; string connId = parts[1]; string strngth = parts[2]; if (container.Users.ContainsKey(userId)) container.Users[userId].AddProperty("Connections", connId + " " + parts[2]); } }
public TrustAwareLibFmFeatureBuilder(DataContainer container, int maxConnections, bool useConnectionStrength) { MaxConnections = maxConnections; UseConnectionStrength = useConnectionStrength; if (useConnectionStrength) return; _connectionsFreq = new Dictionary<string, int>(); foreach (var u in container.Users.Values) { string conns = u.GetProperty("Connections"); if (!string.IsNullOrEmpty(conns)) { foreach (string connId in conns.Split(',')) { AddConnectionCount(connId); } } } }
public void TestAmazonDatasetSingleNewModel() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var trainContainer = new DataContainer(); var testContainer = new DataContainer(); var trainReader = new CsvReader(Paths.AmazonBooksTrain75, config); var testReader = new CsvReader(Paths.AmazonBooksTest25, config); trainReader.LoadData(trainContainer); testReader.LoadData(testContainer); var dataset = new ItemRatingDataset(trainContainer, testContainer); //var featureBuilder = new LibFmFeatureBuilder(); // step 2: recommender var recommender = new LibFmTrainTester(); // step3: evaluation var ep = new EvaluationPipeline<ItemRating>(new EvalutationContext<ItemRating>(recommender, dataset)); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); }
public void TestAmazonAllDomains2() { // step 1: dataset var config = new CsvConfiguration() { Delimiter = ",", HasHeaderRecord = true }; var container = new DataContainer(); var bookReader = new CsvReader(Paths.AmazonBooksRatings, config); var musicReader = new CsvReader(Paths.AmazonMusicRatings, config); var dvdReader = new CsvReader(Paths.AmazonDvdRatings, config); var videoReader = new CsvReader(Paths.AmazonVideoRatings, config); bookReader.LoadData(container); musicReader.LoadData(container); dvdReader.LoadData(container); videoReader.LoadData(container); var splitter = new RatingSimpleSplitter(container, 0.25f); var startTime = DateTime.Now; var recommender = new LibFmTrainTester(); //var recommender = new MediaLiteRatingPredictor(new MatrixFactorization()); // step3: evaluation var ctx = new EvalutationContext<ItemRating>(recommender, splitter); var ep = new EvaluationPipeline<ItemRating>(ctx); ep.Evaluators.Add(new RMSE()); ep.Evaluators.Add(new MAE()); ep.Run(); var duration = DateTime.Now.Subtract(startTime); Console.WriteLine("RMSE\t{0}\nDuration\t{1}", ctx["RMSE"], (int)duration.TotalMilliseconds); }
public RatingSimpleSplitter(DataContainer container) { Train = container.Ratings.Where(r => r.IsTest == false); Test = container.Ratings.Where(r => r.IsTest == true); }