public static void removeUserThreshold(ref ITimedRatings all_data) { IList <int> items_to_delete = new List <int> (); Dictionary <int, IList <DateTime> > itemsTimes = all_data.getTimesItemDict(); foreach (int item in all_data.AllItems) { if (itemsTimes [item].Count < 20) { items_to_delete.Add(item); } } foreach (int item in items_to_delete) { all_data.RemoveItem(item); } IList <int> user_to_delete = new List <int>(); Dictionary <int, IList <int> > userItems = all_data.getItemsUserDict(); foreach (int user in all_data.AllUsers) { if (userItems[user].Count < 20) { user_to_delete.Add(user); } } foreach (int user in user_to_delete) { all_data.RemoveUser(user); } Console.Write(all_data.Statistics()); Console.Write("Finished removing thresholds"); }
// private static void readAndSplitDataRandomly(ITimedRatings all_data, ref ITimedRatings validation_data, ref ITimedRatings test_data, ref ITimedRatings training_data){ // // // Console.WriteLine (all_data.Statistics ()); // // List<int> validation_list_item = new List<int>(); // List<int> test_list_item = new List<int>(); // List<int> training_list_item = new List<int>(); // // List<DateTime> validation_list_time = new List<DateTime>(); // List<DateTime> test_list_time = new List<DateTime>(); // List<DateTime> training_list_time = new List<DateTime>(); // // List<int> validation_list_user = new List<int>(); // List<int> test_list_user = new List<int>(); // List<int> training_list_user = new List<int>(); // // System.Random gen = new System.Random(); // for (int i = 0; i < all_data.Users.Count; i++) { // int rnd = gen.Next(100); // if(rnd <= 10){ // validation_list_item.Add(all_data.Items[i]); // validation_list_time.Add(all_data.Times[i]); // validation_list_user.Add(all_data.Users[i]); // }else if(rnd <=30){ // test_list_item.Add(all_data.Items[i]); // test_list_time.Add(all_data.Times[i]); // test_list_user.Add(all_data.Users[i]); // }else{ // training_list_item.Add(all_data.Items[i]); // training_list_time.Add(all_data.Times[i]); // training_list_user.Add(all_data.Users[i]); // } // } // create_data(validation_list_user, validation_list_item, validation_list_time, ref validation_data); // create_data(test_list_user, test_list_item, test_list_time, ref test_data); // create_data(training_list_user, training_list_item, training_list_time, ref training_data); // // // // Console.Write(validation_data.Statistics ()); // Console.Write(test_data.Statistics ()); // Console.Write(training_data.Statistics()); // Console.WriteLine ("finished creating datasets:"); // Console.WriteLine (DateTime.Now); // } // private static void readAndSplitData(ITimedRatings all_data, ref ITimedRatings test_data, ref ITimedRatings training_data, ref ITimedRatings validation_data) { Dictionary <int, IList <DateTime> > user_times = all_data.getTimesUserDict(); Dictionary <int, IList <int> > user_items = all_data.getItemsUserDict(); foreach (int user_id in all_data.AllUsers) { List <DateTime> timesOfUser = (List <DateTime>)user_times [user_id]; List <int> itemsOfUser = (List <int>)user_items[user_id]; int amountCheckIns = timesOfUser.Count; //int validation = (int)(amountCheckIns * 0.1); int test = (int)(amountCheckIns * 0.2); int training = (int)(amountCheckIns * 0.7); List <int> training_list_item = itemsOfUser.GetRange(0, training); List <int> test_list_item = itemsOfUser.GetRange(training, amountCheckIns - (training)); List <int> validation_list_item = itemsOfUser.GetRange(training + test, amountCheckIns - (training + test)); List <DateTime> training_list_time = timesOfUser.GetRange(0, training); List <DateTime> test_list_time = timesOfUser.GetRange(training, amountCheckIns - (training)); List <DateTime> validation_list_time = timesOfUser.GetRange(training + test, amountCheckIns - (training + test)); create_data(user_id, validation_list_item, validation_list_time, ref validation_data); create_data(user_id, test_list_item, test_list_time, ref test_data); create_data(user_id, training_list_item, training_list_time, ref training_data); } Console.Write(validation_data.Statistics()); Console.Write(test_data.Statistics()); Console.Write(training_data.Statistics()); Console.WriteLine("finished creating datasets:"); Console.WriteLine(DateTime.Now); }
// private static void readAndSplitDataRandomly(ITimedRatings all_data, ref ITimedRatings test_data, ref ITimedRatings training_data) { Dictionary <int, IList <DateTime> > user_times = all_data.getTimesUserDict(); Dictionary <int, IList <int> > user_items = all_data.getItemsUserDict(); List <int> test_list_item = new List <int>(); List <int> training_list_item = new List <int>(); List <DateTime> test_list_time = new List <DateTime>(); List <DateTime> training_list_time = new List <DateTime>(); List <int> test_list_user = new List <int>(); List <int> training_list_user = new List <int>(); System.Random gen = new System.Random(); for (int i = 0; i < all_data.Users.Count; i++) { int rnd = gen.Next(100); if (rnd <= 30) { test_list_item.Add(all_data.Items[i]); test_list_time.Add(all_data.Times[i]); test_list_user.Add(all_data.Users[i]); } else { training_list_item.Add(all_data.Items[i]); training_list_time.Add(all_data.Times[i]); training_list_user.Add(all_data.Users[i]); } } create_data(test_list_user, test_list_item, test_list_time, ref test_data); create_data(training_list_user, training_list_item, training_list_time, ref training_data); Console.Write(test_data.Statistics()); Console.Write(training_data.Statistics()); Console.WriteLine("finished creating datasets:"); Console.WriteLine(DateTime.Now); }
// /// <summary> // /// Gets string for subselecting all id's used from database // /// </summary> // /// <returns>The all identifiers string for database.</returns> // static private string getAllIdsStringForDatabase(IList<int> allItems){ // // string all_ids = "("; // bool first = true; // foreach (int id in allItems) { // if (first) { // all_ids += id.ToString (); // first = false; // } else // all_ids += "," + id.ToString (); // } // all_ids += ")"; // return all_ids; // } // //// static public void getWeatherVectorLocation(IList<int> items, string connection_string, ref Dictionary<int,IList<double>> venueWeatherVectors){ //// DBConnect conn = new DBConnect (connection_string); //// List<string>[] res; //// res = conn.Select ("select * " + //// " from weather_avgs_per_venue where id_int in "+getAllIdsStringForDatabase(items), 9); //// List<string> all_ids = res [0]; //// List<string> temperature = res [1]; //// List<string> precip_intensity = res [2]; //// List<string> wind_speed = res [3]; //// List<string> humidity = res [4]; //// List<string> cloud_cover = res [5]; //// List<string> pressure = res [6]; //// List<string> visibility = res [7]; //// List<string> moonphase = res [8]; //// int i = 0; //// foreach(string id in all_ids){ //// venueWeatherVectors.Add(int.Parse (id),new List<double> { double.Parse(temperature [i]), double.Parse(precip_intensity [i]), double.Parse(wind_speed [i]), double.Parse(humidity [i]), //// double.Parse(cloud_cover [i])}); //// i++; //// } //// } /// <summary>Evaluation for rankings of items</summary> /// <remarks> /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation, /// except the boolean argument repeated_events is set. /// /// The evaluation measures are listed in the Measures property. /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results /// and the number of items that were taken into account. /// /// Literature: /// <list type="bullet"> /// <item><description> /// C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008 /// </description></item> /// </list> /// /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible, /// which should to an almost linear speed-up. /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="training">training data</param> /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param> /// <returns>a dictionary containing the evaluation results (default is false)</returns> // static public ItemRecommendationEvaluationResults Evaluate( // this IRecommender recommender, // ITimedRatings test, // ITimedRatings training, // string connection_string = "", // int n = -1,double alpha = 0.1) // { // // var result = new ItemRecommendationEvaluationResults(); // var candidates = test.AllItems.Intersect(training.AllItems).ToList(); // int num_users = 0; // ThreadPool.SetMinThreads(test.AllUsers.Count, test.AllUsers.Count); // Dictionary<int,IList<int>> user_items = test.getItemsUserDict (); // ParallelOptions po = new ParallelOptions{ // MaxDegreeOfParallelism = Environment.ProcessorCount // }; // // //foreach(int user_id in test.AllUsers){ // Parallel.ForEach (test.AllUsers, po, user_id => { // try { // n = user_items [user_id].Count; // IList<Tuple<int,float>> prediction; // prediction = recommender.Recommend (user_id, candidate_items: candidates, n: n); // var prediction_list = (from t in prediction select t.Item1).ToArray (); // int num_candidates_for_this_user = candidates.Count (); // int num_dropped_items = num_candidates_for_this_user - prediction.Count; // var correct_items = user_items [user_id].Intersect (candidates).ToList (); // if (correct_items.Count () == 0) // return; // // double auc = AUC.Compute (prediction_list, correct_items, num_dropped_items); // double map = PrecisionAndRecall.AP (prediction_list, correct_items); // double ndcg = NDCG.Compute (prediction_list, correct_items); // double rr = ReciprocalRank.Compute (prediction_list, correct_items); // var positions = new int[] { 5, 10 }; // var prec = PrecisionAndRecall.PrecisionAt (prediction_list, correct_items, positions); // var recall = PrecisionAndRecall.RecallAt (prediction_list, correct_items, positions); // // // thread-safe incrementing // lock (result) { // num_users++; // result ["AUC"] += (float)auc; // result ["MAP"] += (float)map; // result ["NDCG"] += (float)ndcg; // result ["MRR"] += (float)rr; // result ["prec@5"] += (float)prec [5]; // result ["prec@10"] += (float)prec [10]; // result ["recall@5"] += (float)recall [5]; // result ["recall@10"] += (float)recall [10]; // } // // if (num_users % 1000 == 0) // Console.Error.Write ("."); // if (num_users % 60000 == 0) // Console.Error.WriteLine (); // } catch (Exception e) { // Console.Error.WriteLine ("===> ERROR: " + e.Message + e.StackTrace); // throw; // } // }); // // foreach (string measure in Measures) // result[measure] /= num_users; // result["num_users"] = num_users; // result["num_lists"] = num_users; // result["num_items"] = candidates.Count(); // // return result; // } static public double EvaluateTime( this IRecommender recommender, ITimedRatings test, ITimedRatings training, string dataset, bool time_aware, int n = -1, double alpha = 0.1) { Dictionary <int, ItemRecommendationEvaluationResults> userRecommendationResults = new Dictionary <int, ItemRecommendationEvaluationResults> (); foreach (int user in test.AllUsers) { userRecommendationResults.Add(user, new ItemRecommendationEvaluationResults()); } var candidates = test.AllItems.Intersect(training.AllItems).ToList(); ParallelOptions po = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }; bool init = true; Dictionary <int, IList <int> > trainingUserItems = training.getItemsUserDict(); Parallel.For(0, test.Users.Count - 1, po, index => { try{ DateTime time = test.Times[index]; int user = test.Users[index]; int item = test.Items[index]; if (trainingUserItems[user].Contains(item)) { return; } IList <int> correct_items = new List <int>(); correct_items.Add(item); correct_items = correct_items.Intersect(candidates).ToList(); if (correct_items.Count() == 0) { return; } IList <Tuple <int, float> > prediction; if (time_aware) { prediction = ((ITimeAwareRatingPredictor)recommender).RecommendTime(user, time, candidate_items: candidates, n: 20); } else { prediction = recommender.Recommend(user, candidate_items: candidates, n: 20); } var prediction_list = (from t in prediction select t.Item1).ToArray(); double auc = AUC.Compute(prediction_list, correct_items, 0); double map = PrecisionAndRecall.AP(prediction_list, correct_items); double ndcg = NDCG.Compute(prediction_list, correct_items); double rr = ReciprocalRank.Compute(prediction_list, correct_items); var positions = new int[] { 5, 10 }; var prec = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions); var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions); lock (userRecommendationResults){ ItemRecommendationEvaluationResults res = userRecommendationResults[user]; res["AUC"] += (float)auc; res["MAP"] += (float)map; res["NDCG"] += (float)ndcg; res["MRR"] += (float)rr; res["prec@5"] += (float)prec [5]; res["prec@10"] += (float)prec [10]; res["recall@5"] += (float)recall [5]; res["recall@10"] += (float)recall [10]; if (!init) { res["AUC"] /= 2; res["MAP"] /= 2; res["NDCG"] /= 2; res["MRR"] /= 2; res["prec@5"] /= 2; res["prec@10"] /= 2; res["recall@5"] /= 2; res["recall@10"] /= 2; } init = false; userRecommendationResults[user] = res; } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); ItemRecommendationEvaluationResults avg_res = new ItemRecommendationEvaluationResults(); int num_users = 0; Console.WriteLine("Detailed user results:"); foreach (int user in userRecommendationResults.Keys) { Console.Write("User: "******"{0}={1}", key, userRecommendationResults [user] [key]); } num_users++; } foreach (string measure in Measures) { avg_res[measure] /= num_users; } Console.WriteLine(dataset + " Avg results:"); foreach (var key in avg_res.Keys) { Console.WriteLine("{0}={1}", key, avg_res[key]); } return(avg_res["prec@5"]); }