private static void getSubset(ITimedRatings all_data, ref ITimedRatings sub_all_data) { List <int> sub_list_item = new List <int>(); List <DateTime> sub_list_time = new List <DateTime>(); List <int> sub_list_user = new List <int>(); System.Random gen = new System.Random(); IList <int> all_users = all_data.AllUsers; IList <int> users = new List <int> (); for (int i = 0; i < all_users.Count; i++) { int rnd = gen.Next(100); if (rnd <= 2) { users.Add(all_users[i]); } } for (int i = 0; i < all_data.Users.Count; i++) { if (users.Contains(all_data.Users [i])) { sub_list_item.Add(all_data.Items [i]); sub_list_time.Add(all_data.Times [i]); sub_list_user.Add(all_data.Users [i]); } } create_data(sub_list_user, sub_list_item, sub_list_time, ref sub_all_data); Console.Write(sub_all_data.Statistics()); Console.WriteLine("finished creating subset:"); Console.WriteLine(DateTime.Now); }
public static void removeUserThreshold(ref ITimedRatings all_data) { IList <int> items_to_delete = new List <int> (); Dictionary <int, IList <DateTime> > itemsTimes = all_data.getTimesItemDict(); foreach (int item in all_data.AllItems) { if (itemsTimes [item].Count < 20) { items_to_delete.Add(item); } } foreach (int item in items_to_delete) { all_data.RemoveItem(item); } IList <int> user_to_delete = new List <int>(); Dictionary <int, IList <int> > userItems = all_data.getItemsUserDict(); foreach (int user in all_data.AllUsers) { if (userItems[user].Count < 20) { user_to_delete.Add(user); } } foreach (int user in user_to_delete) { all_data.RemoveUser(user); } Console.Write(all_data.Statistics()); Console.Write("Finished removing thresholds"); }
// private static void readAndSplitDataRandomly(ITimedRatings all_data, ref ITimedRatings validation_data, ref ITimedRatings test_data, ref ITimedRatings training_data){ // // // Console.WriteLine (all_data.Statistics ()); // // List<int> validation_list_item = new List<int>(); // List<int> test_list_item = new List<int>(); // List<int> training_list_item = new List<int>(); // // List<DateTime> validation_list_time = new List<DateTime>(); // List<DateTime> test_list_time = new List<DateTime>(); // List<DateTime> training_list_time = new List<DateTime>(); // // List<int> validation_list_user = new List<int>(); // List<int> test_list_user = new List<int>(); // List<int> training_list_user = new List<int>(); // // System.Random gen = new System.Random(); // for (int i = 0; i < all_data.Users.Count; i++) { // int rnd = gen.Next(100); // if(rnd <= 10){ // validation_list_item.Add(all_data.Items[i]); // validation_list_time.Add(all_data.Times[i]); // validation_list_user.Add(all_data.Users[i]); // }else if(rnd <=30){ // test_list_item.Add(all_data.Items[i]); // test_list_time.Add(all_data.Times[i]); // test_list_user.Add(all_data.Users[i]); // }else{ // training_list_item.Add(all_data.Items[i]); // training_list_time.Add(all_data.Times[i]); // training_list_user.Add(all_data.Users[i]); // } // } // create_data(validation_list_user, validation_list_item, validation_list_time, ref validation_data); // create_data(test_list_user, test_list_item, test_list_time, ref test_data); // create_data(training_list_user, training_list_item, training_list_time, ref training_data); // // // // Console.Write(validation_data.Statistics ()); // Console.Write(test_data.Statistics ()); // Console.Write(training_data.Statistics()); // Console.WriteLine ("finished creating datasets:"); // Console.WriteLine (DateTime.Now); // } // private static void readAndSplitData(ITimedRatings all_data, ref ITimedRatings test_data, ref ITimedRatings training_data, ref ITimedRatings validation_data) { Dictionary <int, IList <DateTime> > user_times = all_data.getTimesUserDict(); Dictionary <int, IList <int> > user_items = all_data.getItemsUserDict(); foreach (int user_id in all_data.AllUsers) { List <DateTime> timesOfUser = (List <DateTime>)user_times [user_id]; List <int> itemsOfUser = (List <int>)user_items[user_id]; int amountCheckIns = timesOfUser.Count; //int validation = (int)(amountCheckIns * 0.1); int test = (int)(amountCheckIns * 0.2); int training = (int)(amountCheckIns * 0.7); List <int> training_list_item = itemsOfUser.GetRange(0, training); List <int> test_list_item = itemsOfUser.GetRange(training, amountCheckIns - (training)); List <int> validation_list_item = itemsOfUser.GetRange(training + test, amountCheckIns - (training + test)); List <DateTime> training_list_time = timesOfUser.GetRange(0, training); List <DateTime> test_list_time = timesOfUser.GetRange(training, amountCheckIns - (training)); List <DateTime> validation_list_time = timesOfUser.GetRange(training + test, amountCheckIns - (training + test)); create_data(user_id, validation_list_item, validation_list_time, ref validation_data); create_data(user_id, test_list_item, test_list_time, ref test_data); create_data(user_id, training_list_item, training_list_time, ref training_data); } Console.Write(validation_data.Statistics()); Console.Write(test_data.Statistics()); Console.Write(training_data.Statistics()); Console.WriteLine("finished creating datasets:"); Console.WriteLine(DateTime.Now); }
// private static void readAndSplitDataRandomly(ITimedRatings all_data, ref ITimedRatings test_data, ref ITimedRatings training_data) { Dictionary <int, IList <DateTime> > user_times = all_data.getTimesUserDict(); Dictionary <int, IList <int> > user_items = all_data.getItemsUserDict(); List <int> test_list_item = new List <int>(); List <int> training_list_item = new List <int>(); List <DateTime> test_list_time = new List <DateTime>(); List <DateTime> training_list_time = new List <DateTime>(); List <int> test_list_user = new List <int>(); List <int> training_list_user = new List <int>(); System.Random gen = new System.Random(); for (int i = 0; i < all_data.Users.Count; i++) { int rnd = gen.Next(100); if (rnd <= 30) { test_list_item.Add(all_data.Items[i]); test_list_time.Add(all_data.Times[i]); test_list_user.Add(all_data.Users[i]); } else { training_list_item.Add(all_data.Items[i]); training_list_time.Add(all_data.Times[i]); training_list_user.Add(all_data.Users[i]); } } create_data(test_list_user, test_list_item, test_list_time, ref test_data); create_data(training_list_user, training_list_item, training_list_time, ref training_data); Console.Write(test_data.Statistics()); Console.Write(training_data.Statistics()); Console.WriteLine("finished creating datasets:"); Console.WriteLine(DateTime.Now); }