Example #1
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="qc1">the scores for all the queries computed from the existing system</param>
        /// <param name="qc2">the score for all the queries computed from the newly added tree</param>
        /// <param name="queryIdxActive">the set of "active queries" that we use to find the optimal combination</param>
        /// <param name="bestMeanNDCGGain"></param>
        /// <returns></returns>
        public double FindStep(QueryCollection qc1, QueryCollection qc2, int[] queryIdxActive, out double bestMeanNDCGGain)
        {
            PairRankedItems.Reset();

            // (queryIdxActive == null) <=> using all the queries in qc1/qc2
            int cActiveQueries = (queryIdxActive == null) ? qc1.NQueries : queryIdxActive.Length;

            if(qc1.Count != qc2.Count)
                throw new Exception("Input files must have same number of rows.");
            if(qc1.NQueries != qc2.NQueries)
                throw new Exception("Input files must have same number of queries.");

            if (qc1.NQueries < cActiveQueries)
                throw new Exception("Active queries must be less than all the queries.");
            DCGScorer dcg = new DCGScorer();

            // The relabeling must be done before FillRankedItems is called // REVIEW: ??
            long nPairs; // Only used for debug
            int nDocs;   // ditto
            CountDocsNAllPairs(qc1, qc2, queryIdxActive, out nPairs, out nDocs); // ditto
            int rankedItemCtr = 0;
            //PairRankedItems pri = null;
            int nQueries = 0;
            int nSkippedQueries = 0;
            for (int i = 0; i < cActiveQueries; ++i)
            {
                int qIdx = (queryIdxActive == null) ? i : queryIdxActive[i];
                Query query1 = qc1.queries[qIdx];
                Query query2 = qc2.queries[qIdx];
                // We discard the array itself each time, but the object pointers persist.
                // Also: discard any queries that have maxDCG = 0.
                RankedItem[] thisRankedItems = FillRankedItems(query1, query2, dcg, random);
                if(thisRankedItems != null)
                {
                    FillRanks(thisRankedItems);
                    //pri = FillPairRankedItems(thisRankedItems, convex, maxStep, ref rankedItemCtr);
                    FillPairRankedItems(thisRankedItems, convex, alphaPos, maxStep, ref rankedItemCtr); // This forms a linked list.
                    ++nQueries;
                }
                else
                {
                    ++nSkippedQueries;
                }
            }

            PairRankedItems[] pairRankedItems = PRI_ListToArray();
            if (alphaPos)
            {
                Array.Sort(pairRankedItems, new SortPairRankedItemsIncreasing()); // First value closest to zero, next more positive
            }
            else
            {
                Array.Sort(pairRankedItems, new SortPairRankedItemsDecreasing()); // First value still closest to zero, next more negative
            }
            // Now that we have the sorted values of alpha: compute which global alpha gives best NDCG gain.
            double bestAlpha;
            FindBestAlpha(pairRankedItems, dcg, nQueries, out bestAlpha, out bestMeanNDCGGain);

            if (verbose)
            {
                Console.WriteLine("{0} queries total, {1} skipped queries, {2} docs", nQueries + nSkippedQueries, nSkippedQueries, nDocs);
                Console.WriteLine("Tot. # pairs = {0}, num. pairs in computation = {1}", nPairs, pairRankedItems.Length);
                // For the convex combination, it's tempting to rescale alpha so that the first weight is one.  But this is not always possible:
                // it may need to be -1.
                Console.WriteLine("Best mean NDCG Gain = {0}, best alpha = {1}", bestMeanNDCGGain, bestAlpha);

                // Check that the gain is correct.
                qc1.ComputeNDCGs();
                qc2.ComputeNDCGs();
                double firstFactor = convex ? 1.0 - bestAlpha : 1.0;
                QueryCollection qc = QueryCollection.LinearlyCombine(firstFactor, qc1, bestAlpha, qc2);
                qc.ComputeNDCGs();
                Console.WriteLine("NON-TRUNC: First NDCG = {0:F4}/{1:F4}, second = {2:F4}/{3:F4}, combined = {4:F4}/{5:F4}",
                                  qc1.NonTruncNDCG_pes, qc1.NonTruncNDCG_opt, qc2.NonTruncNDCG_pes, qc2.NonTruncNDCG_opt,
                                  qc.NonTruncNDCG_pes, qc.NonTruncNDCG_opt);
                Console.WriteLine("    TRUNC: First NDCG = {0:F4}/{1:F4}, second = {2:F4}/{3:F4}, combined = {4:F4}/{5:F4}",
                                  qc1.TruncNDCG_pes, qc1.TruncNDCG_opt, qc2.TruncNDCG_pes, qc2.TruncNDCG_opt,
                                  qc.TruncNDCG_pes, qc.TruncNDCG_opt);
            }

            return bestAlpha;
        }
Example #2
0
        /// <summary>
        /// Loop through every alpha.  If both labels are the same, just swap ranks (and no change to NDCG).
        /// If not, still swap ranks, and compute cumulative delta NDCG.  Keep track of that alpha that gave the
        /// best NDCG.  Also treat as a special case alpha=0 (which may give the best result, and which may not
        /// be one of the listed alphas, since those always correspond to swapping points).
        /// </summary>
        /// <param name="pairRankedItems">Assumed sorted by alpha, with the value closest to zero first. WARNING: SIDE EFFECTS on RankedItems.</param>
        /// <param name="dcg"></param>
        /// <param name="bestAlpha"></param>
        /// <param name="bestNDCGGain"></param>
        void FindBestAlpha(PairRankedItems[] pairRankedItems, DCGScorer scorer, int nQueries, out double bestAlpha, out double bestMeanNDCGGain)
        {
            bestAlpha = 0.0;
            double bestNDCGGain = 0.0;
            int bestIndex = 0;
            double NDCGGain = 0.0; // This really is a gain in a gain (again)
            double[] markups = DCGScorer.discounts; // Position dependent part of NDCG

            // Rely on jittering to take care of degeneracy.
            int loopLength = pairRankedItems.Length;
            //int degCtr = 0;
            //while (loopLength != 0)
            //{
            for (int i = 0; i < loopLength; ++i)
            {
                PairRankedItems pairRankedItem = pairRankedItems[i];
                RankedItem x = pairRankedItem.item1;
                RankedItem y = pairRankedItem.item2;
                int rankx = x.rank;
                int ranky = y.rank;
                if (rankx != ranky + 1 && rankx != ranky - 1)
                {
                    throw new Exception("FindBestAlpha: degenerate scores encountered.");
                    //pairRankedItems[degCtr++] = pairRankedItem;
                    //Console.WriteLine("Warning: we've hit a degenerate pair: QueryID {0}", x.QueryID);
                    //Console.WriteLine("QueryID: {0} s1_1 {1} s1_2 {2} s2_1 {3} s2_2 {4} rank1 {5} rank2 {6} crossing error...", x.QueryID, x.score1, y.score1, x.score2, y.score2, rankx, ranky);
                }
                else
                {
                    if (x.label != y.label)
                    {
                        double ndcgx = x.ndcgWt;
                        double ndcgy = y.ndcgWt;
                        double markupx = markups[rankx];
                        double markupy = markups[ranky];
                        NDCGGain += (ndcgx - ndcgy) * (markupy - markupx);
                        if (NDCGGain > bestNDCGGain)
                        {
                            bestNDCGGain = NDCGGain;
                            bestIndex = i;
                        }
                    }

                    // Positions swap only if in the open interval (not at the edges), otherwise could get a spurious gain
                    if ((convex && pairRankedItem.alpha != 0.0 && pairRankedItem.alpha != 1.0) ||
                         (!convex && pairRankedItem.alpha != 0.0 && pairRankedItem.alpha < maxStep && pairRankedItem.alpha > -maxStep))
                    {
                        x.rank = ranky;
                        y.rank = rankx;
                    }
                }
            }

            //				if(degCtr > 0)
            //					Console.WriteLine("Num degenerates = {0}", degCtr);
            //				loopLength = degCtr;
            //				degCtr = 0;
            //			}

            // Put the best alpha half way between that found (which is on the border) and the next, unless it's the last.
            if(bestIndex < pairRankedItems.Length - 1)
                bestAlpha = 0.5 * ( pairRankedItems[bestIndex].alpha + pairRankedItems[bestIndex + 1].alpha );
            else if(bestIndex == pairRankedItems.Length - 1)
                bestAlpha = pairRankedItems[bestIndex].alpha;
            else // The passed pairRankedItems array could be empty.
                bestAlpha = 0.0;

            bestMeanNDCGGain = bestNDCGGain / (double)nQueries;
        }
Example #3
0
        /// <summary>
        /// Assumes all query data has been prepared in the linked list of QueryRows, except features and scores (which don't live
        /// in individual queries).
        /// </summary>
        /// <param name="QID"></param>
        /// <param name="dcg">Can be null if desired (in which case max DCGs won't be computed).</param>
        public Query(string QID, double scoreForDegenerateQuery)
        {
            this.QueryID = QID;
            this.scoreForDegenerateQuery = scoreForDegenerateQuery;
            this.length = 0;
            QueryRow ptr = QueryRow.mostRecent;
            while(ptr != null)
            {
                ++length;
                ptr = ptr.previous;
            }

            ftrVectors = new float[length][];
            scores = new double[length];
            scoresCp = new double[length];
            ranks = new int[length];
            labels = new float[length];
            ptr = QueryRow.mostRecent;
            int ctr = length-1; // stick to original order
            while(ptr != null)
            {
                labels[ctr] = ptr.Label;
                ftrVectors[ctr] = ptr.Features;
                --ctr;
                ptr = ptr.previous;
            }

            // Reset QueryRow
            QueryRow.Reset();

            previous = mostRecent;
            mostRecent = this;

            // Fill DCGs
            DCGScorer dcg = new DCGScorer();
            FillMaxDCGs(dcg);
        }
Example #4
0
        /// <summary>
        /// query1 and query2 must be the same query (but with different scores).  The urls must be in the same order.  However they need not be sorted.
        /// </summary>
        /// <param name="query1"></param>
        /// <param name="query2"></param>
        /// <param name="dcg"></param>
        /// <param name="truncLevel"></param>
        /// <returns>Null if this query has zero maxDCG.  Else, a rankedItem array, sorted by the scores in query1.</returns>
        public static RankedItem[] FillRankedItems(Query query1, Query query2, DCGScorer scorer, Random ran)
        {
            if(query1.Length != query2.Length)
                throw new Exception("Query length mismatch.");
            if(query1.QueryID != query2.QueryID)
                throw new Exception("Queries have differnt IDs.");
            int length = query1.Length;
            double maxDCG = query1.MaxNonTruncDCG;
            if(maxDCG == 0.0)
                return null;
            RankedItem[] rankedItems = new RankedItem[length];
            double[] scores1 = query1.scores;
            double[] scores2 = query2.scores;
            string QID = query1.QueryID;
            for(int i = 0; i < length; ++i)
            {
                float label = query1.Labels[i];
                if(label != query2.Labels[i])
                    throw new Exception("FillRankedItems: label mismatch.");
                rankedItems[i] = new RankedItem((double)DCGScorer.scoresMap[(int)label] / maxDCG, scores1[i], scores2[i], label);//, QueryID);
            }

            if (rankedItems != null)
            {
                SortNJitter(rankedItems, ran);
            }
            return rankedItems;
        }
Example #5
0
 public void FillMaxDCGs(DCGScorer dcg)
 {
     // Faster to compute the histogram just once
     int[] hist = new int[dcg.NLabels];
     for (int i = 0; i < labels.Length; ++i)
     {
         ++hist[(int)labels[i]];
     }
     maxNonTruncDCG = dcg.ComputeMaxDCG(hist);
     maxTruncDCG = dcg.ComputeMaxTruncDCG(hist);
 }
Example #6
0
 /// <summary>
 /// Compute both truncated and non-truncated NDCG for this query.
 /// </summary>
 /// <returns></returns>
 public void ComputeNDCGs()
 {
     if (maxNonTruncDCG == 0.0) // Then also maxTruncDCG = 0.0, and this query is degenerate.
     { // Note: these may be skipped in the NDCG computation for a bunch of queries - but that is handled in QueryCollection.
         Debug.Assert(maxTruncDCG == 0.0, "maxNonTruncDCG = 0 should imply maxTruncDCG = 0");
         nonTruncNDCG_opt = scoreForDegenerateQuery;
         nonTruncNDCG_pes = scoreForDegenerateQuery;
         nonTruncNDCG_mean = scoreForDegenerateQuery;
         truncNDCG_opt = scoreForDegenerateQuery;
         truncNDCG_pes = scoreForDegenerateQuery;
         truncNDCG_mean = scoreForDegenerateQuery;
     }
     else
     {
         DCGScorer dcg = new DCGScorer();
         double truncDCG_pes, nonTruncDCG_pes, truncDCG_opt, nonTruncDCG_opt, truncDCG_mean, nonTruncDCG_mean;
         dcg.ComputeDCGs(true, scores, labels, out truncDCG_pes, out nonTruncDCG_pes);
         dcg.ComputeDCGs(false, scores, labels, out truncDCG_opt, out nonTruncDCG_opt);
         dcg.ComputeMeanDCGs(scores, labels, out truncDCG_mean, out nonTruncDCG_mean);
         truncNDCG_pes = truncDCG_pes / maxTruncDCG;
         truncNDCG_opt = truncDCG_opt / maxTruncDCG;
         truncNDCG_mean = truncDCG_mean / maxTruncDCG;
         nonTruncNDCG_pes = nonTruncDCG_pes / maxNonTruncDCG;
         nonTruncNDCG_opt = nonTruncDCG_opt / maxNonTruncDCG;
         nonTruncNDCG_mean = nonTruncDCG_mean / maxNonTruncDCG;
     }
 }
Example #7
0
        /// <summary>
        /// Generate a random query.  No need to pass labelForUnlabeled here, since the random queries are all given labels.
        /// Still pass scoreForDegenerateQuery just in case some query gets all labels the same.
        /// Scores are not set here.
        /// </summary>
        /// <param name="priors"></param>
        /// <param name="scale"></param>
        /// <param name="nScores"></param>
        /// <param name="rangen"></param>
        /// <param name="nRows"></param>
        public Query(float[] priors, double scale, int nScores, int nRows, string queryID, Random rangen)
        {
            length = nRows;
            QueryID = queryID;

            QueryRow[] qr = new QueryRow[length];
            ftrVectors = new float[length][];
            scores = new double[length];
            labels = new float[length];
            for (int i = 0; i < nRows; ++i)
            {
                qr[i] = new QueryRow(priors, scale, nScores, rangen);
                labels[i] = qr[i].Label;
                ftrVectors[i] = qr[i].Features;
            }

            DCGScorer dcg = new DCGScorer();
            FillMaxDCGs(dcg);
        }
Example #8
0
        public Query(string QID, float[] labels, float[][] ftrVectors, double[] scores, float labelForUnlabeled,
                     double scoreForDegenerateQuery, bool dcgFlag)
        {
            QueryID = QID;
            this.scoreForDegenerateQuery = scoreForDegenerateQuery;
            this.labels = labels;
            this.ftrVectors = ftrVectors;
            this.scores = scores;
            length = labels.Length;
            scoresCp = new double[length];
            ranks = new int[length];

            // Compute max DCGs
            FixUnlabeledRows(labelForUnlabeled);
            DCGScorer dcg = new DCGScorer();
            if(dcgFlag)
                FillMaxDCGs(dcg);
        }
Example #9
0
 public NDCG(bool dropEmptyQueries, float scoreForEmptyQuery, int ndcgAt)
 {
     this.dropEmptyQueries = dropEmptyQueries;
     this.scoreForEmptyQuery = scoreForEmptyQuery;
     dcgScorer = new DCGScorer();
     DCGScorer.truncLevel = ndcgAt;
 }