Beispiel #1
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="test_users">a list of integers with all test users; if null, use all users in the test cases</param>
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
        static public ItemRecommendationEvaluationResults Evaluate(
            this IRecommender recommender,
            IPosOnlyFeedback test,
            IPosOnlyFeedback training,
            IList <int> test_users             = null,
            IList <int> candidate_items        = null,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            RepeatedEvents repeated_events     = RepeatedEvents.No,
            int n = -1)
        {
            if (test_users == null)
            {
                test_users = test.AllUsers;
            }
            candidate_items = Candidates(candidate_items, candidate_item_mode, test, training);

            var result = new ItemRecommendationEvaluationResults();

            // make sure that the user matrix is completely initialized before entering parallel code
            var training_user_matrix = training.UserMatrix;
            var test_user_matrix     = test.UserMatrix;

            int num_users = 0;

            Parallel.ForEach(test_users, user_id => {
                try
                {
                    var correct_items = new HashSet <int>(test_user_matrix[user_id]);
                    correct_items.IntersectWith(candidate_items);
                    if (correct_items.Count == 0)
                    {
                        return;
                    }

                    var ignore_items_for_this_user = new HashSet <int>(
                        repeated_events == RepeatedEvents.Yes || training_user_matrix[user_id] == null ? new int[0] : training_user_matrix[user_id]
                        );

                    ignore_items_for_this_user.IntersectWith(candidate_items);
                    int num_candidates_for_this_user = candidate_items.Count - ignore_items_for_this_user.Count;
                    if (correct_items.Count == num_candidates_for_this_user)
                    {
                        return;
                    }

                    var prediction      = recommender.Recommend(user_id, candidate_items: candidate_items, n: n, ignore_items: ignore_items_for_this_user);
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    int num_dropped_items = num_candidates_for_this_user - prediction.Count;
                    double auc            = AUC.Compute(prediction_list, correct_items, num_dropped_items);
                    double map            = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg           = NDCG.Compute(prediction_list, correct_items);
                    double rr             = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions         = new int[] { 5, 10 };
                    var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    // thread-safe incrementing
                    lock (result)
                    {
                        num_users++;
                        result["AUC"]       += (float)auc;
                        result["MAP"]       += (float)map;
                        result["NDCG"]      += (float)ndcg;
                        result["MRR"]       += (float)rr;
                        result["prec@5"]    += (float)prec[5];
                        result["prec@10"]   += (float)prec[10];
                        result["recall@5"]  += (float)recall[5];
                        result["recall@10"] += (float)recall[10];
                    }

                    if (num_users % 1000 == 0)
                    {
                        Console.Error.Write(".");
                    }
                    if (num_users % 60000 == 0)
                    {
                        Console.Error.WriteLine();
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (string measure in Measures)
            {
                result[measure] /= num_users;
            }
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = candidate_items.Count;

            return(result);
        }
        public List <ItemRecommendationEvaluationResults> Evaluate(List <IPosOnlyFeedback> test_data,
                                                                   List <IPosOnlyFeedback> training_data, List <IList <int> > test_users, List <IMapping> user_mapping,
                                                                   List <IMapping> item_mapping,

                                                                   int n = -1)
        {
            List <IList <int> >   candidate_items      = new List <IList <int> >();
            List <RepeatedEvents> repeated_events      = new List <RepeatedEvents>();
            List <IBooleanMatrix> training_user_matrix = new List <IBooleanMatrix>();
            List <IBooleanMatrix> test_user_matrix     = new List <IBooleanMatrix>();


            for (int i = 0; i < m_recommenders.Count; i++)
            {
                candidate_items.Add(new List <int>(test_data[i].AllItems.Union(training_data[i].AllItems)));
                repeated_events.Add(RepeatedEvents.No);


                if (candidate_items[i] == null)
                {
                    throw new ArgumentNullException("candidate_items");
                }
                if (test_users[i] == null)
                {
                    test_users[i] = test_data[i].AllUsers;
                }

                training_user_matrix.Add(training_data[i].UserMatrix);
                test_user_matrix.Add(test_data[i].UserMatrix);
            }
            int num_users = 0;
            var result    = new List <ItemRecommendationEvaluationResults>();

            for (int i = 0; i < m_recommenders.Count + 3; i++) // +Ensemble +GA
            {
                result.Add(new ItemRecommendationEvaluationResults());
            }

            // make sure that the user matrix is completely initialized before entering parallel code



            foreach (int user_id in test_users[0])
            {
                string original = user_mapping[0].ToOriginalID(user_id);


                List <IList <Tuple <int, float> > > list_of_predictions = new List <IList <Tuple <int, float> > >();

                HashSet <int> correct_items = new HashSet <int>();

                List <HashSet <int> > ignore_items_for_this_user = new List <HashSet <int> >();

                List <int> num_candidates_for_this_user = new List <int>();


                correct_items = new HashSet <int>(test_user_matrix[0][user_id]);
                correct_items.IntersectWith(candidate_items[0]);


                for (int i = 0; i < m_recommenders.Count; i++)
                {
                    int internalId = user_mapping[i].ToInternalID(original);


                    ignore_items_for_this_user.Add(new HashSet <int>(training_user_matrix[i][internalId]));



                    /* if (correct_items[i].Count == 0)
                     *   continue;
                     */

                    ignore_items_for_this_user[i].IntersectWith(candidate_items[i]);
                    num_candidates_for_this_user.Add(candidate_items[i].Count - ignore_items_for_this_user[i].Count);

                    /*if (correct_items[i].Count == num_candidates_for_this_user[i])
                     *  continue;
                     */


                    //Recomenda


                    var listaRecomendacao = m_recommenders[i].Recommend(user_id, candidate_items: candidate_items[i], n: n, ignore_items: ignore_items_for_this_user[i]);
                    for (int j = 0; j < listaRecomendacao.Count; j++)
                    {
                        string idOriginal    = item_mapping[i].ToOriginalID(listaRecomendacao[j].Item1);
                        int    idMappingZero = item_mapping[0].ToInternalID(idOriginal);


                        Tuple <int, float> tupla = new Tuple <int, float>(idMappingZero, listaRecomendacao[j].Item2);

                        listaRecomendacao[j] = tupla;
                    }

                    list_of_predictions.Add(listaRecomendacao);
                }



                //}

                //Nova
                //var prediction = Ensenble(list_of_predictions);
                //var prediction_list = (from t in prediction select t.Key).ToArray();



                for (int i = 0; i < m_recommenders.Count + 3; i++) // +Ensemble +GA
                {
                    int best = m_ensemble.best_alg[original];

                    IList <int> prediction_list  = null;
                    int         prediction_count = 0;


                    if (i == list_of_predictions.Count)//Best of all
                    {
                        var prediction = list_of_predictions[best];
                        prediction_list  = (from t in prediction select t.Item1).ToArray();
                        prediction_count = prediction.Count;
                    }
                    else if (i == list_of_predictions.Count + 1)//emsemble
                    {
                        var prediction_ensemble = m_ensemble.Ensenble(list_of_predictions);

                        prediction_list  = (from t in prediction_ensemble select t.Key).ToArray();
                        prediction_count = prediction_ensemble.Count;
                    }
                    else if (i == list_of_predictions.Count + 2)//GA
                    {
                        //Set global so Fitness itens can see.
                        m_ensemble.list_prediction_probes = list_of_predictions;
                        m_ensemble.correct_items_global   = correct_items;

                        var prediction_ensemble = m_ensemble.EnsenblePeso(m_ensemble.ga_weights[original].ToArray());

                        prediction_list  = (from t in prediction_ensemble select t.Key).ToArray();
                        prediction_count = prediction_ensemble.Count;
                    }
                    else
                    {
                        var prediction = list_of_predictions[i];
                        prediction_list  = (from t in prediction select t.Item1).ToArray();
                        prediction_count = prediction.Count;
                    }



                    int    num_dropped_items = num_candidates_for_this_user[0] - prediction_count;
                    double auc       = AUC.Compute(prediction_list, correct_items, num_dropped_items);
                    double map       = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg      = NDCG.Compute(prediction_list, correct_items);
                    double rr        = ReciprocalRank.Compute(prediction_list, correct_items);
                    var    positions = new int[] { 5, 10 };
                    var    prec      = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var    recall    = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    // thread-safe incrementing

                    num_users++;
                    result[i]["AUC"]       += (float)auc;
                    result[i]["MAP"]       += (float)map;
                    result[i]["NDCG"]      += (float)ndcg;
                    result[i]["MRR"]       += (float)rr;
                    result[i]["prec@5"]    += (float)prec[5];
                    result[i]["prec@10"]   += (float)prec[10];
                    result[i]["recall@5"]  += (float)recall[5];
                    result[i]["recall@10"] += (float)recall[10];
                }



                if (num_users % 1000 == 0)
                {
                    Console.Error.Write(".");
                }
                if (num_users % 60000 == 0)
                {
                    Console.Error.WriteLine();
                }
            }


            num_users /= m_recommenders.Count + 3;

            for (int i = 0; i < m_recommenders.Count + 3; i++) // +Ensemble +GA
            {
                foreach (string measure in Measures)
                {
                    result[i][measure] /= num_users;
                }
                result[i]["num_users"] = num_users;
                result[i]["num_lists"] = num_users;
                result[i]["num_items"] = candidate_items.Count;
            }

            return(result);
        }
//		/// <summary>
//		/// Gets string for subselecting all id's used from database
//		/// </summary>
//		/// <returns>The all identifiers string for database.</returns>
//		static private string getAllIdsStringForDatabase(IList<int> allItems){
//
//			string all_ids = "(";
//			bool first = true;
//			foreach (int id in allItems) {
//				if (first) {
//					all_ids += id.ToString ();
//					first = false;
//				} else
//					all_ids += "," + id.ToString ();
//			}
//			all_ids += ")";
//			return all_ids;
//		}
//
////		static public void getWeatherVectorLocation(IList<int> items, string connection_string, ref Dictionary<int,IList<double>> venueWeatherVectors){
////			DBConnect conn = new DBConnect (connection_string);
////			List<string>[] res;
////			res = conn.Select ("select * " +
////			" from weather_avgs_per_venue where id_int in "+getAllIdsStringForDatabase(items), 9);
////			List<string> all_ids = res [0];
////			List<string> temperature = res [1];
////			List<string> precip_intensity = res [2];
////			List<string> wind_speed = res [3];
////			List<string> humidity = res [4];
////			List<string> cloud_cover = res [5];
////			List<string> pressure = res [6];
////			List<string> visibility = res [7];
////			List<string> moonphase = res [8];
////			int i = 0;
////			foreach(string id in all_ids){
////				venueWeatherVectors.Add(int.Parse (id),new List<double> { double.Parse(temperature [i]), double.Parse(precip_intensity [i]), double.Parse(wind_speed [i]), double.Parse(humidity [i]),
////					double.Parse(cloud_cover [i])});
////				i++;
////			}
////		}


        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
//		static public ItemRecommendationEvaluationResults Evaluate(
//			this IRecommender recommender,
//			ITimedRatings test,
//			ITimedRatings training,
//			string connection_string = "",
//			int n = -1,double alpha = 0.1)
//		{
//
//			var result = new ItemRecommendationEvaluationResults();
//			var candidates = test.AllItems.Intersect(training.AllItems).ToList();
//			int num_users = 0;
//			ThreadPool.SetMinThreads(test.AllUsers.Count, test.AllUsers.Count);
//			Dictionary<int,IList<int>> user_items = test.getItemsUserDict ();
//			ParallelOptions po = new ParallelOptions{
//				MaxDegreeOfParallelism = Environment.ProcessorCount
//			};
//
//			//foreach(int user_id in test.AllUsers){
//			Parallel.ForEach (test.AllUsers, po, user_id => {
//				try {
//					n = user_items [user_id].Count;
//					IList<Tuple<int,float>> prediction;
//					prediction = recommender.Recommend (user_id, candidate_items: candidates, n: n);
//					var prediction_list = (from t in prediction select t.Item1).ToArray ();
//					int num_candidates_for_this_user = candidates.Count ();
//					int num_dropped_items = num_candidates_for_this_user - prediction.Count;
//					var correct_items = user_items [user_id].Intersect (candidates).ToList ();
//					if (correct_items.Count () == 0)
//						return;
//
//					double auc = AUC.Compute (prediction_list, correct_items, num_dropped_items);
//					double map = PrecisionAndRecall.AP (prediction_list, correct_items);
//					double ndcg = NDCG.Compute (prediction_list, correct_items);
//					double rr = ReciprocalRank.Compute (prediction_list, correct_items);
//					var positions = new int[] { 5, 10 };
//					var prec = PrecisionAndRecall.PrecisionAt (prediction_list, correct_items, positions);
//					var recall = PrecisionAndRecall.RecallAt (prediction_list, correct_items, positions);
//
//					// thread-safe incrementing
//					lock (result) {
//						num_users++;
//						result ["AUC"] += (float)auc;
//						result ["MAP"] += (float)map;
//						result ["NDCG"] += (float)ndcg;
//						result ["MRR"] += (float)rr;
//						result ["prec@5"] += (float)prec [5];
//						result ["prec@10"] += (float)prec [10];
//						result ["recall@5"] += (float)recall [5];
//						result ["recall@10"] += (float)recall [10];
//					}
//
//					if (num_users % 1000 == 0)
//						Console.Error.Write (".");
//					if (num_users % 60000 == 0)
//						Console.Error.WriteLine ();
//				} catch (Exception e) {
//					Console.Error.WriteLine ("===> ERROR: " + e.Message + e.StackTrace);
//					throw;
//				}
//			});
//
//			foreach (string measure in Measures)
//				result[measure] /= num_users;
//			result["num_users"] = num_users;
//			result["num_lists"] = num_users;
//			result["num_items"] = candidates.Count();
//
//			return result;
//		}


        static public double EvaluateTime(
            this IRecommender recommender,
            ITimedRatings test,
            ITimedRatings training,
            string dataset,
            bool time_aware,
            int n = -1, double alpha = 0.1)
        {
            Dictionary <int, ItemRecommendationEvaluationResults> userRecommendationResults = new Dictionary <int, ItemRecommendationEvaluationResults> ();

            foreach (int user in test.AllUsers)
            {
                userRecommendationResults.Add(user, new ItemRecommendationEvaluationResults());
            }

            var             candidates = test.AllItems.Intersect(training.AllItems).ToList();
            ParallelOptions po         = new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            };
            bool init = true;
            Dictionary <int, IList <int> > trainingUserItems = training.getItemsUserDict();

            Parallel.For(0, test.Users.Count - 1, po, index => {
                try{
                    DateTime time = test.Times[index];

                    int user = test.Users[index];
                    int item = test.Items[index];
                    if (trainingUserItems[user].Contains(item))
                    {
                        return;
                    }
                    IList <int> correct_items = new List <int>();
                    correct_items.Add(item);
                    correct_items = correct_items.Intersect(candidates).ToList();
                    if (correct_items.Count() == 0)
                    {
                        return;
                    }
                    IList <Tuple <int, float> > prediction;
                    if (time_aware)
                    {
                        prediction = ((ITimeAwareRatingPredictor)recommender).RecommendTime(user, time, candidate_items: candidates, n: 20);
                    }
                    else
                    {
                        prediction = recommender.Recommend(user, candidate_items: candidates, n: 20);
                    }
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    double auc    = AUC.Compute(prediction_list, correct_items, 0);
                    double map    = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg   = NDCG.Compute(prediction_list, correct_items);
                    double rr     = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions = new int[] { 5, 10 };
                    var prec      = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall    = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    lock (userRecommendationResults){
                        ItemRecommendationEvaluationResults res = userRecommendationResults[user];
                        res["AUC"]       += (float)auc;
                        res["MAP"]       += (float)map;
                        res["NDCG"]      += (float)ndcg;
                        res["MRR"]       += (float)rr;
                        res["prec@5"]    += (float)prec [5];
                        res["prec@10"]   += (float)prec [10];
                        res["recall@5"]  += (float)recall [5];
                        res["recall@10"] += (float)recall [10];
                        if (!init)
                        {
                            res["AUC"]       /= 2;
                            res["MAP"]       /= 2;
                            res["NDCG"]      /= 2;
                            res["MRR"]       /= 2;
                            res["prec@5"]    /= 2;
                            res["prec@10"]   /= 2;
                            res["recall@5"]  /= 2;
                            res["recall@10"] /= 2;
                        }
                        init = false;
                        userRecommendationResults[user] = res;
                    }
                } catch (Exception e) {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });
            ItemRecommendationEvaluationResults avg_res = new ItemRecommendationEvaluationResults();
            int num_users = 0;

            Console.WriteLine("Detailed user results:");
            foreach (int user in userRecommendationResults.Keys)
            {
                Console.Write("User: "******"{0}={1}", key, userRecommendationResults [user] [key]);
                }
                num_users++;
            }
            foreach (string measure in Measures)
            {
                avg_res[measure] /= num_users;
            }
            Console.WriteLine(dataset + " Avg results:");
            foreach (var key in avg_res.Keys)
            {
                Console.WriteLine("{0}={1}", key, avg_res[key]);
            }
            return(avg_res["prec@5"]);
        }