예제 #1
0
    public static void removeUserThreshold(ref ITimedRatings all_data)
    {
        IList <int> items_to_delete = new List <int> ();
        Dictionary <int, IList <DateTime> > itemsTimes = all_data.getTimesItemDict();

        foreach (int item in all_data.AllItems)
        {
            if (itemsTimes [item].Count < 20)
            {
                items_to_delete.Add(item);
            }
        }
        foreach (int item in items_to_delete)
        {
            all_data.RemoveItem(item);
        }

        IList <int> user_to_delete = new List <int>();
        Dictionary <int, IList <int> > userItems = all_data.getItemsUserDict();

        foreach (int user in all_data.AllUsers)
        {
            if (userItems[user].Count < 20)
            {
                user_to_delete.Add(user);
            }
        }
        foreach (int user in user_to_delete)
        {
            all_data.RemoveUser(user);
        }

        Console.Write(all_data.Statistics());
        Console.Write("Finished removing thresholds");
    }
예제 #2
0
//	private static void readAndSplitDataRandomly(ITimedRatings all_data, ref ITimedRatings validation_data, ref ITimedRatings test_data, ref ITimedRatings training_data){
//
//
//		Console.WriteLine (all_data.Statistics ());
//
//		List<int> validation_list_item = new List<int>();
//		List<int> test_list_item = new List<int>();
//		List<int> training_list_item = new List<int>();
//
//		List<DateTime> validation_list_time = new List<DateTime>();
//		List<DateTime> test_list_time = new List<DateTime>();
//		List<DateTime> training_list_time = new List<DateTime>();
//
//		List<int> validation_list_user = new List<int>();
//		List<int> test_list_user = new List<int>();
//		List<int> training_list_user = new List<int>();
//
//		System.Random gen = new System.Random();
//		for (int i = 0; i < all_data.Users.Count; i++) {
//			int rnd = gen.Next(100);
//			if(rnd <= 10){
//				validation_list_item.Add(all_data.Items[i]);
//				validation_list_time.Add(all_data.Times[i]);
//				validation_list_user.Add(all_data.Users[i]);
//			}else if(rnd <=30){
//				test_list_item.Add(all_data.Items[i]);
//				test_list_time.Add(all_data.Times[i]);
//				test_list_user.Add(all_data.Users[i]);
//			}else{
//				training_list_item.Add(all_data.Items[i]);
//				training_list_time.Add(all_data.Times[i]);
//				training_list_user.Add(all_data.Users[i]);
//			}
//		}
//		create_data(validation_list_user,  validation_list_item, validation_list_time, ref validation_data);
//		create_data(test_list_user,  test_list_item, test_list_time, ref test_data);
//		create_data(training_list_user,  training_list_item, training_list_time, ref training_data);
//
//
//
//		Console.Write(validation_data.Statistics ());
//		Console.Write(test_data.Statistics ());
//		Console.Write(training_data.Statistics());
//		Console.WriteLine ("finished creating datasets:");
//		Console.WriteLine (DateTime.Now);
//	}
//
    private static void readAndSplitData(ITimedRatings all_data, ref ITimedRatings test_data, ref ITimedRatings training_data, ref ITimedRatings validation_data)
    {
        Dictionary <int, IList <DateTime> > user_times = all_data.getTimesUserDict();
        Dictionary <int, IList <int> >      user_items = all_data.getItemsUserDict();

        foreach (int user_id in all_data.AllUsers)
        {
            List <DateTime> timesOfUser    = (List <DateTime>)user_times [user_id];
            List <int>      itemsOfUser    = (List <int>)user_items[user_id];
            int             amountCheckIns = timesOfUser.Count;
            //int validation = (int)(amountCheckIns * 0.1);
            int test     = (int)(amountCheckIns * 0.2);
            int training = (int)(amountCheckIns * 0.7);

            List <int> training_list_item   = itemsOfUser.GetRange(0, training);
            List <int> test_list_item       = itemsOfUser.GetRange(training, amountCheckIns - (training));
            List <int> validation_list_item = itemsOfUser.GetRange(training + test, amountCheckIns - (training + test));

            List <DateTime> training_list_time   = timesOfUser.GetRange(0, training);
            List <DateTime> test_list_time       = timesOfUser.GetRange(training, amountCheckIns - (training));
            List <DateTime> validation_list_time = timesOfUser.GetRange(training + test, amountCheckIns - (training + test));

            create_data(user_id, validation_list_item, validation_list_time, ref validation_data);
            create_data(user_id, test_list_item, test_list_time, ref test_data);
            create_data(user_id, training_list_item, training_list_time, ref training_data);
        }

        Console.Write(validation_data.Statistics());
        Console.Write(test_data.Statistics());
        Console.Write(training_data.Statistics());
        Console.WriteLine("finished creating datasets:");
        Console.WriteLine(DateTime.Now);
    }
예제 #3
0
//
    private static void readAndSplitDataRandomly(ITimedRatings all_data, ref ITimedRatings test_data, ref ITimedRatings training_data)
    {
        Dictionary <int, IList <DateTime> > user_times = all_data.getTimesUserDict();
        Dictionary <int, IList <int> >      user_items = all_data.getItemsUserDict();

        List <int> test_list_item     = new List <int>();
        List <int> training_list_item = new List <int>();


        List <DateTime> test_list_time     = new List <DateTime>();
        List <DateTime> training_list_time = new List <DateTime>();

        List <int> test_list_user     = new List <int>();
        List <int> training_list_user = new List <int>();

        System.Random gen = new System.Random();
        for (int i = 0; i < all_data.Users.Count; i++)
        {
            int rnd = gen.Next(100);
            if (rnd <= 30)
            {
                test_list_item.Add(all_data.Items[i]);
                test_list_time.Add(all_data.Times[i]);
                test_list_user.Add(all_data.Users[i]);
            }
            else
            {
                training_list_item.Add(all_data.Items[i]);
                training_list_time.Add(all_data.Times[i]);
                training_list_user.Add(all_data.Users[i]);
            }
        }
        create_data(test_list_user, test_list_item, test_list_time, ref test_data);
        create_data(training_list_user, training_list_item, training_list_time, ref training_data);

        Console.Write(test_data.Statistics());
        Console.Write(training_data.Statistics());
        Console.WriteLine("finished creating datasets:");
        Console.WriteLine(DateTime.Now);
    }
예제 #4
0
//		/// <summary>
//		/// Gets string for subselecting all id's used from database
//		/// </summary>
//		/// <returns>The all identifiers string for database.</returns>
//		static private string getAllIdsStringForDatabase(IList<int> allItems){
//
//			string all_ids = "(";
//			bool first = true;
//			foreach (int id in allItems) {
//				if (first) {
//					all_ids += id.ToString ();
//					first = false;
//				} else
//					all_ids += "," + id.ToString ();
//			}
//			all_ids += ")";
//			return all_ids;
//		}
//
////		static public void getWeatherVectorLocation(IList<int> items, string connection_string, ref Dictionary<int,IList<double>> venueWeatherVectors){
////			DBConnect conn = new DBConnect (connection_string);
////			List<string>[] res;
////			res = conn.Select ("select * " +
////			" from weather_avgs_per_venue where id_int in "+getAllIdsStringForDatabase(items), 9);
////			List<string> all_ids = res [0];
////			List<string> temperature = res [1];
////			List<string> precip_intensity = res [2];
////			List<string> wind_speed = res [3];
////			List<string> humidity = res [4];
////			List<string> cloud_cover = res [5];
////			List<string> pressure = res [6];
////			List<string> visibility = res [7];
////			List<string> moonphase = res [8];
////			int i = 0;
////			foreach(string id in all_ids){
////				venueWeatherVectors.Add(int.Parse (id),new List<double> { double.Parse(temperature [i]), double.Parse(precip_intensity [i]), double.Parse(wind_speed [i]), double.Parse(humidity [i]),
////					double.Parse(cloud_cover [i])});
////				i++;
////			}
////		}


        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
//		static public ItemRecommendationEvaluationResults Evaluate(
//			this IRecommender recommender,
//			ITimedRatings test,
//			ITimedRatings training,
//			string connection_string = "",
//			int n = -1,double alpha = 0.1)
//		{
//
//			var result = new ItemRecommendationEvaluationResults();
//			var candidates = test.AllItems.Intersect(training.AllItems).ToList();
//			int num_users = 0;
//			ThreadPool.SetMinThreads(test.AllUsers.Count, test.AllUsers.Count);
//			Dictionary<int,IList<int>> user_items = test.getItemsUserDict ();
//			ParallelOptions po = new ParallelOptions{
//				MaxDegreeOfParallelism = Environment.ProcessorCount
//			};
//
//			//foreach(int user_id in test.AllUsers){
//			Parallel.ForEach (test.AllUsers, po, user_id => {
//				try {
//					n = user_items [user_id].Count;
//					IList<Tuple<int,float>> prediction;
//					prediction = recommender.Recommend (user_id, candidate_items: candidates, n: n);
//					var prediction_list = (from t in prediction select t.Item1).ToArray ();
//					int num_candidates_for_this_user = candidates.Count ();
//					int num_dropped_items = num_candidates_for_this_user - prediction.Count;
//					var correct_items = user_items [user_id].Intersect (candidates).ToList ();
//					if (correct_items.Count () == 0)
//						return;
//
//					double auc = AUC.Compute (prediction_list, correct_items, num_dropped_items);
//					double map = PrecisionAndRecall.AP (prediction_list, correct_items);
//					double ndcg = NDCG.Compute (prediction_list, correct_items);
//					double rr = ReciprocalRank.Compute (prediction_list, correct_items);
//					var positions = new int[] { 5, 10 };
//					var prec = PrecisionAndRecall.PrecisionAt (prediction_list, correct_items, positions);
//					var recall = PrecisionAndRecall.RecallAt (prediction_list, correct_items, positions);
//
//					// thread-safe incrementing
//					lock (result) {
//						num_users++;
//						result ["AUC"] += (float)auc;
//						result ["MAP"] += (float)map;
//						result ["NDCG"] += (float)ndcg;
//						result ["MRR"] += (float)rr;
//						result ["prec@5"] += (float)prec [5];
//						result ["prec@10"] += (float)prec [10];
//						result ["recall@5"] += (float)recall [5];
//						result ["recall@10"] += (float)recall [10];
//					}
//
//					if (num_users % 1000 == 0)
//						Console.Error.Write (".");
//					if (num_users % 60000 == 0)
//						Console.Error.WriteLine ();
//				} catch (Exception e) {
//					Console.Error.WriteLine ("===> ERROR: " + e.Message + e.StackTrace);
//					throw;
//				}
//			});
//
//			foreach (string measure in Measures)
//				result[measure] /= num_users;
//			result["num_users"] = num_users;
//			result["num_lists"] = num_users;
//			result["num_items"] = candidates.Count();
//
//			return result;
//		}


        static public double EvaluateTime(
            this IRecommender recommender,
            ITimedRatings test,
            ITimedRatings training,
            string dataset,
            bool time_aware,
            int n = -1, double alpha = 0.1)
        {
            Dictionary <int, ItemRecommendationEvaluationResults> userRecommendationResults = new Dictionary <int, ItemRecommendationEvaluationResults> ();

            foreach (int user in test.AllUsers)
            {
                userRecommendationResults.Add(user, new ItemRecommendationEvaluationResults());
            }

            var             candidates = test.AllItems.Intersect(training.AllItems).ToList();
            ParallelOptions po         = new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            };
            bool init = true;
            Dictionary <int, IList <int> > trainingUserItems = training.getItemsUserDict();

            Parallel.For(0, test.Users.Count - 1, po, index => {
                try{
                    DateTime time = test.Times[index];

                    int user = test.Users[index];
                    int item = test.Items[index];
                    if (trainingUserItems[user].Contains(item))
                    {
                        return;
                    }
                    IList <int> correct_items = new List <int>();
                    correct_items.Add(item);
                    correct_items = correct_items.Intersect(candidates).ToList();
                    if (correct_items.Count() == 0)
                    {
                        return;
                    }
                    IList <Tuple <int, float> > prediction;
                    if (time_aware)
                    {
                        prediction = ((ITimeAwareRatingPredictor)recommender).RecommendTime(user, time, candidate_items: candidates, n: 20);
                    }
                    else
                    {
                        prediction = recommender.Recommend(user, candidate_items: candidates, n: 20);
                    }
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    double auc    = AUC.Compute(prediction_list, correct_items, 0);
                    double map    = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg   = NDCG.Compute(prediction_list, correct_items);
                    double rr     = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions = new int[] { 5, 10 };
                    var prec      = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall    = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    lock (userRecommendationResults){
                        ItemRecommendationEvaluationResults res = userRecommendationResults[user];
                        res["AUC"]       += (float)auc;
                        res["MAP"]       += (float)map;
                        res["NDCG"]      += (float)ndcg;
                        res["MRR"]       += (float)rr;
                        res["prec@5"]    += (float)prec [5];
                        res["prec@10"]   += (float)prec [10];
                        res["recall@5"]  += (float)recall [5];
                        res["recall@10"] += (float)recall [10];
                        if (!init)
                        {
                            res["AUC"]       /= 2;
                            res["MAP"]       /= 2;
                            res["NDCG"]      /= 2;
                            res["MRR"]       /= 2;
                            res["prec@5"]    /= 2;
                            res["prec@10"]   /= 2;
                            res["recall@5"]  /= 2;
                            res["recall@10"] /= 2;
                        }
                        init = false;
                        userRecommendationResults[user] = res;
                    }
                } catch (Exception e) {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });
            ItemRecommendationEvaluationResults avg_res = new ItemRecommendationEvaluationResults();
            int num_users = 0;

            Console.WriteLine("Detailed user results:");
            foreach (int user in userRecommendationResults.Keys)
            {
                Console.Write("User: "******"{0}={1}", key, userRecommendationResults [user] [key]);
                }
                num_users++;
            }
            foreach (string measure in Measures)
            {
                avg_res[measure] /= num_users;
            }
            Console.WriteLine(dataset + " Avg results:");
            foreach (var key in avg_res.Keys)
            {
                Console.WriteLine("{0}={1}", key, avg_res[key]);
            }
            return(avg_res["prec@5"]);
        }