public static void GenerateSplitTestAndTrainDataFiles() { //split Console.WriteLine("Loading all users... {0}", DateTime.Now.ToLongTimeString()); var users = UserProvider.Load(DataFiles.Users); List <IUser> trainUsers, testUsers; Console.WriteLine("Spliting users into train and test sets... {0}", DateTime.Now.ToLongTimeString()); users.SplitIntoTrainAndTest(out trainUsers, out testUsers); trainUsers.Sort(); testUsers.Sort(); Console.WriteLine("Saving train users... {0}", DateTime.Now.ToLongTimeString()); UserProvider.Save(DataFiles.TrainUsers, trainUsers); Console.WriteLine("Saving test users... {0}", DateTime.Now.ToLongTimeString()); UserProvider.Save(DataFiles.TestUsers, testUsers); var trainUserCount = trainUsers.Count; var testUserCount = testUsers.Count; Console.WriteLine("Loading artists... {0}", DateTime.Now.ToLongTimeString()); var artistLut = ArtistProvider.Load(DataFiles.Artists).GetLookupTable(); //train dataset Console.WriteLine("Saving and reloading train ratings... {0}", DateTime.Now.ToLongTimeString()); LoadAndSaveRatings(DataFiles.TrainPlaycounts, trainUsers, artistLut); GC.Collect(); Console.WriteLine("Preprocessing train ratings... {0}", DateTime.Now.ToLongTimeString()); PreprocessTrainRatings(trainUserCount); //test dataset Console.WriteLine("Saving and reloading test ratings... {0}", DateTime.Now.ToLongTimeString()); LoadAndSaveRatings(DataFiles.TestPlaycounts, testUsers, artistLut); GC.Collect(); Console.WriteLine("Preprocessing test ratings... {0}", DateTime.Now.ToLongTimeString()); PreprocessTestRatings(testUserCount); GC.Collect(); }
private static int LoadFromDataset() { Console.WriteLine("Loading users from dataset... {0}", DateTime.Now.ToLongTimeString()); List <string> userLut, artistLut; var users = UserProvider.ImportFromDataset(DataFiles.UserDataset, out userLut); UserProvider.Save(DataFiles.Users, users); GC.Collect(); Console.WriteLine("Loading artists from dataset... {0}", DateTime.Now.ToLongTimeString()); var artists = ArtistProvider.ImportFromDataset(DataFiles.RatingDataset, out artistLut); ArtistProvider.Save(DataFiles.Artists, artists); Console.WriteLine("Loading ratings from dataset... {0}", DateTime.Now.ToLongTimeString()); var ratings = RatingProvider.ImportFromDataset(DataFiles.RatingDataset, userLut, artistLut); Console.WriteLine("Populating users with ratings... {0}", DateTime.Now.ToLongTimeString()); users.PopulateWithRatings(ratings, true); GC.Collect(); Console.WriteLine("Extracting ratings from users... {0}", DateTime.Now.ToLongTimeString()); ratings.ExtractFromUsers(users, false); RatingProvider.Save(DataFiles.Playcounts, ratings); return(users.Count); }