示例#1
0
        /// <summary>
        /// Use one function rather than operators, since it's much more efficient.
        /// </summary>
        /// <param name="alpha"></param>
        /// <param name="qc1"></param>
        /// <param name="beta"></param>
        /// <param name="qc2"></param>
        /// <returns></returns>
        public static QueryCollection LinearlyCombine(double alpha, QueryCollection qc1, double beta, QueryCollection qc2)
        {
            QueryCollection qc = qc1.CopyEmptyQueryCollection();
            for (int i = 0; i < qc.nQueries; ++i)
            {
                double[] scores1 = qc1.queries[i].scores;
                double[] scores2 = qc2.queries[i].scores;
                double[] scoresCombined = qc.queries[i].scores;
                if (scores1.Length != scores2.Length)
                    throw new Exception("LinearlyCombine: size mismatch");
                for (int j = 0; j < scores1.Length; ++j)
                {
                    scoresCombined[j] = alpha * scores1[j] + beta * scores2[j];
                }
            }

            return qc;
        }
示例#2
0
        /// <summary>
        /// Generate two QueryCollections containing randomly generated scores and labels (although they share
        /// all the same labels, as though one dataset tested on two models).  The scores are loosely
        /// correlated with labels.  Then, compute the best linear combination.  Finally compare the claimed NDCG gain
        /// with the NDCG gain computed directly.  The relative frequencies of the labels are taken from the May 2005
        /// training set: 
        /// 
        /// Perfect:	0.0204
        /// Excellent: 	0.0523
        /// Good:		0.2714
        /// Fair:		0.2855
        /// Bad:		0.3704
        ///
        /// Note we use random features to make it very unlikely that there will be any degeneracy: so the claimed delta NDCG
        /// should be what's actually measured by taking the linear combination that FindStep proposes.
        /// </summary>
        /// <param name="qc1"></param>
        /// <param name="qc2"></param>
        /// <param name="nDocsPerQuery"></param>
        static void SelfTest(int nQueries, int nDocsPerQuery, FindStepLib fs)
        {
            Random rangen = new Random(0);
            float[] priors = new float[5];
            priors[0] = 0.3704F; // bads first
            priors[1] = 0.2855F;
            priors[2] = 0.2714F;
            priors[3] = 0.0523F;
            priors[4] = 0.0204F;
            double scale1 = 10.0;
            double scale2 = 20.0;
            int nScores = 1;
            QueryCollection qc1 = new QueryCollection(nQueries, priors, scale1, nScores, nDocsPerQuery, rangen);
            // Must share labels
            QueryCollection qc2 = qc1.CopyEmptyQueryCollection();
            for(int i = 0; i < qc2.queries.Length; ++i)
            {
                Query q1 = qc1.queries[i];
                Query q2 = qc2.queries[i];
                for(int j = 0; j < q1.Length; ++j)
                {
                    double label = (double) q1.Labels[j];
                    if (q2.Labels[j] != label)
                        throw new Exception("Labels mismatch.");
                    q1.scores[j] = (float)(label + scale1 * (2.0 * rangen.NextDouble() - 1.0));
                    q2.scores[j] = (float)(label + scale2 * (2.0 * rangen.NextDouble() - 1.0));
                }

            }

            double bestMeanNDCGGain;
            // We will only check for positive alphas.
            double alpha = fs.FindStep(qc1, qc2, null, out bestMeanNDCGGain); // prints out the best NDCG gain
            Console.WriteLine("Optimal alpha = {0}", alpha);

            double firstFactor = fs.convex ? (1.0 - alpha) : 1.0;

            qc1.ComputeNDCGs();
            double initialNDCG_pes = qc1.NonTruncNDCG_pes;
            double initialNDCG_opt = qc1.NonTruncNDCG_opt;
            Console.WriteLine("Initial nonTruncNDCG = {0}-{1}", initialNDCG_pes, initialNDCG_opt);
            QueryCollection qc = QueryCollection.LinearlyCombine(firstFactor, qc1, alpha, qc2);
            qc.ComputeNDCGs();
            double finalNDCG_pes = qc.NonTruncNDCG_pes;
            double finalNDCG_opt = qc.NonTruncNDCG_opt;
            Console.WriteLine("Final nonTruncNDCG = {0}-{1}", finalNDCG_pes, finalNDCG_opt);

            Console.WriteLine("Type RETURN for exhaustive search");
            Console.ReadLine();
            double bestFound = 0.0;
            double maxAlpha = fs.convex ? 1.0 : fs.MaxStep;
            double alphaFactor = fs.alphaPos ? 1.0 : -1.0;
            for(int i = 0; i < 10001; ++i)
            {
                alpha = alphaFactor * (double)(i * maxAlpha) / 10000.0;
                qc = QueryCollection.LinearlyCombine(firstFactor, qc1, alpha, qc2);
                qc.ComputeNDCGs();
                if (qc.NonTruncNDCG_opt != qc.NonTruncNDCG_pes)
                    throw new Exception("Self test requires no degeneracy");
                double finalNDCG_mean = qc.NonTruncNDCG_mean;
                if(finalNDCG_mean > bestFound)
                {
                    Console.WriteLine("Best NDCG found so far with search: alpha = {0}, NDCG = {1}", alpha, finalNDCG_mean);
                    bestFound = finalNDCG_mean;
                }
            }
        }