コード例 #1
0
ファイル: FindStep.cs プロジェクト: zbxzc35/BoostTree
        static void Main(string[] args)
        {
            try
            {
                CommandLineArguments cmd = new CommandLineArguments();
                if(CommandLine.Parser.ParseArgumentsWithUsage(args, cmd))
                {
                    Random random = new Random(cmd.seed);
                    FindStepLib fs = new FindStepLib(cmd.convex, random, cmd.verbose);
                    DCGScorer.truncLevel = cmd.truncLevel;

                    if (cmd.selfTest)
                    {
                        //fs.alphaPos = false; // Set to false to search for the optimal negative alpha.  Default is true.
                        SelfTest(10, 100, fs);
                    }
                    else
                    {
                        QueryCollection qc1 = new QueryCollection(cmd.firstScoresFile, cmd.labelForUnlabeled,
                                                                  cmd.skipDegenerateQueries, cmd.scoreForDegenerateQuery);
                        QueryCollection qc2 = new QueryCollection(cmd.secondScoresFile, cmd.labelForUnlabeled,
                                                                  cmd.skipDegenerateQueries, cmd.scoreForDegenerateQuery);

                        // Assume that the first 'feature' is in fact the scores
                        qc1.AssignScoresFromFeature(0);
                        qc2.AssignScoresFromFeature(0);

                        double bestGain;

                        fs.FindStep(qc1, qc2, null, out bestGain);
                    }

            #if DEBUG // Force console to stick around
                    Console.WriteLine("...Press Enter to terminate program...");
                    Console.ReadLine();
            #endif
                }
            }
            catch(Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
        }
コード例 #2
0
ファイル: FindStep.cs プロジェクト: zbxzc35/BoostTree
        /// <summary>
        /// Generate two QueryCollections containing randomly generated scores and labels (although they share
        /// all the same labels, as though one dataset tested on two models).  The scores are loosely
        /// correlated with labels.  Then, compute the best linear combination.  Finally compare the claimed NDCG gain
        /// with the NDCG gain computed directly.  The relative frequencies of the labels are taken from the May 2005
        /// training set: 
        /// 
        /// Perfect:	0.0204
        /// Excellent: 	0.0523
        /// Good:		0.2714
        /// Fair:		0.2855
        /// Bad:		0.3704
        ///
        /// Note we use random features to make it very unlikely that there will be any degeneracy: so the claimed delta NDCG
        /// should be what's actually measured by taking the linear combination that FindStep proposes.
        /// </summary>
        /// <param name="qc1"></param>
        /// <param name="qc2"></param>
        /// <param name="nDocsPerQuery"></param>
        static void SelfTest(int nQueries, int nDocsPerQuery, FindStepLib fs)
        {
            Random rangen = new Random(0);
            float[] priors = new float[5];
            priors[0] = 0.3704F; // bads first
            priors[1] = 0.2855F;
            priors[2] = 0.2714F;
            priors[3] = 0.0523F;
            priors[4] = 0.0204F;
            double scale1 = 10.0;
            double scale2 = 20.0;
            int nScores = 1;
            QueryCollection qc1 = new QueryCollection(nQueries, priors, scale1, nScores, nDocsPerQuery, rangen);
            // Must share labels
            QueryCollection qc2 = qc1.CopyEmptyQueryCollection();
            for(int i = 0; i < qc2.queries.Length; ++i)
            {
                Query q1 = qc1.queries[i];
                Query q2 = qc2.queries[i];
                for(int j = 0; j < q1.Length; ++j)
                {
                    double label = (double) q1.Labels[j];
                    if (q2.Labels[j] != label)
                        throw new Exception("Labels mismatch.");
                    q1.scores[j] = (float)(label + scale1 * (2.0 * rangen.NextDouble() - 1.0));
                    q2.scores[j] = (float)(label + scale2 * (2.0 * rangen.NextDouble() - 1.0));
                }

            }

            double bestMeanNDCGGain;
            // We will only check for positive alphas.
            double alpha = fs.FindStep(qc1, qc2, null, out bestMeanNDCGGain); // prints out the best NDCG gain
            Console.WriteLine("Optimal alpha = {0}", alpha);

            double firstFactor = fs.convex ? (1.0 - alpha) : 1.0;

            qc1.ComputeNDCGs();
            double initialNDCG_pes = qc1.NonTruncNDCG_pes;
            double initialNDCG_opt = qc1.NonTruncNDCG_opt;
            Console.WriteLine("Initial nonTruncNDCG = {0}-{1}", initialNDCG_pes, initialNDCG_opt);
            QueryCollection qc = QueryCollection.LinearlyCombine(firstFactor, qc1, alpha, qc2);
            qc.ComputeNDCGs();
            double finalNDCG_pes = qc.NonTruncNDCG_pes;
            double finalNDCG_opt = qc.NonTruncNDCG_opt;
            Console.WriteLine("Final nonTruncNDCG = {0}-{1}", finalNDCG_pes, finalNDCG_opt);

            Console.WriteLine("Type RETURN for exhaustive search");
            Console.ReadLine();
            double bestFound = 0.0;
            double maxAlpha = fs.convex ? 1.0 : fs.MaxStep;
            double alphaFactor = fs.alphaPos ? 1.0 : -1.0;
            for(int i = 0; i < 10001; ++i)
            {
                alpha = alphaFactor * (double)(i * maxAlpha) / 10000.0;
                qc = QueryCollection.LinearlyCombine(firstFactor, qc1, alpha, qc2);
                qc.ComputeNDCGs();
                if (qc.NonTruncNDCG_opt != qc.NonTruncNDCG_pes)
                    throw new Exception("Self test requires no degeneracy");
                double finalNDCG_mean = qc.NonTruncNDCG_mean;
                if(finalNDCG_mean > bestFound)
                {
                    Console.WriteLine("Best NDCG found so far with search: alpha = {0}, NDCG = {1}", alpha, finalNDCG_mean);
                    bestFound = finalNDCG_mean;
                }
            }
        }