Beispiel #1
0
        public PairRankedItems previous; // automatically construct linked list

        #endregion Fields

        #region Constructors

        public PairRankedItems(RankedItem r1, RankedItem r2)
        {
            item1 = r1;
            item2 = r2;
            previous = mostRecent;
            mostRecent = this;
        }
Beispiel #2
0
        /// <summary>
        /// Find every pair, compute alpha, and store the pair in pairRankedItems.  Items are assumed already ranked.
        /// In fact the convex version (limited to [0,1]) is equivalent to the non-convex version (allowing infinite steps)
        /// (just rescale by 1/(1-alpha)).  However the latter is useful because (1) we can easily control step size and (2)
        /// the old weights are left unchanged.
        /// </summary>
        /// <param name="rankedItems"></param>
        /// <param name="convex"></param>
        /// <param name="maxStep">max step to use for non-convex version.</param>
        /// <param name="ctr"></param>
        /// <returns></returns>
        public static PairRankedItems FillPairRankedItems(RankedItem[] rankedItems, bool convex, bool alphaPos, double maxStep, ref int ctr)
        {
            PairRankedItems pri = null;
            for(int i = 0; i < rankedItems.Length - 1; ++i)
            {
                RankedItem x = rankedItems[i];
                for(int j = i + 1; j < rankedItems.Length; ++j)
                {
                    RankedItem y = rankedItems[j];

                    // See boostingNotes.docx.
                    if(convex)
                    {
                        // The convex combination version: (1-alpha)*score1 + alpha*score2.  Disadvantage: previously computed weights
                        // keep changing.
                        if(x.score2 - x.score1 + y.score1 - y.score2 != 0)
                        {
                            // Note ranks go inversely with score
                            double alpha = ( y.score1 - x.score1 ) / ( x.score2 - x.score1 + y.score1 - y.score2 );
                            if(alpha >= 0.0 && alpha <= 1.0)
                            {
                                pri = new PairRankedItems(x, y);
                                pri.alpha = alpha;
                            }
                        }
                    }
                    else
                    {
                        // The score1 + alpha*score2 version.  Advantage: leaves previously computed weights the same.
                        if(x.score2 != y.score2)
                        {
                            double alpha = ( x.score1 - y.score1 ) / ( y.score2 - x.score2 );
                            // alpha=0 corresponds to original rank order for both convex and non-convex combinations
                            if( (alphaPos && alpha >= 0.0 && alpha <= maxStep) ||
                                (!alphaPos && alpha <= 0.0 && alpha >= -maxStep) )
                            {
                                pri = new PairRankedItems(x, y);
                                pri.alpha = alpha;
                            }
                        }
                    }

                }
            }

            return pri;
        }
Beispiel #3
0
        /// <summary>
        /// Add jitter to those items and only those items with duplicate scores, to reduce single crossing points
        /// as alpha sweeps.  However, although this helps a lot, it is not sufficient: consider the case where scores1 = {1,2,3}
        /// and scores2 = {3,2,1}.  They all meet in the middle.  This is fixed in FindBestAlpha.  Algorithmically speaking
        /// this is not needed, but we add jitter here to significantly reduce overall computational cost when the amount of
        /// degeneracy is high.
        ///
        /// Returns with rankedItems sorted by the (jittered) score1's.
        /// </summary>
        /// <param name="rankedItems"></param>
        /// <param name="ran"></param>
        public static void SortNJitterOld(RankedItem[] rankedItems, Random ran)
        {
            // We only have to sort score2 to add jitter, and if needed, we don't need to re-sort.
            double scale = 1e-6;
            for (int i = 0; i < rankedItems.Length; ++i)
            {
                rankedItems[i].score = rankedItems[i].score2;
            }
            Array.Sort(rankedItems);
            for (int i = 0, j = 1; i < rankedItems.Length - 1; ++i, ++j)
            {
                if (rankedItems[i].score2 == rankedItems[j].score2)
                {
                    rankedItems[i].score2 += scale * (ran.NextDouble() - 0.5); // leave 'j' alone for next comparison
                }
            }

            bool needResorting = false;
            for (int i = 0; i < rankedItems.Length; ++i)
            {
                rankedItems[i].score = rankedItems[i].score1;
            }
            Array.Sort(rankedItems);
            for (int i = 0, j = 1; i < rankedItems.Length - 1; ++i, ++j)
            {
                if (rankedItems[i].score1 == rankedItems[j].score1)
                {
                    rankedItems[i].score1 += scale * (ran.NextDouble() - 0.5);
                    rankedItems[i].score = rankedItems[i].score1;
                    needResorting = true;
                }
            }
            if (needResorting)
                Array.Sort(rankedItems);
        }
Beispiel #4
0
        /// <summary>
        /// This version jitters everything, in an attempt to prevent any degeneracy.  This also effectively picks a particular ranking
        /// for the NDCG baseline, if there is degeneracy.  The only case where this returns null is when all the scores (before or after or
        /// both) are the same, in which case we simply should return 1.0 for the optimal alpha, since the lines will never cross (and if
        /// all initial scores are zero, chances are we've started with an empty model and should just take the scores of the first trained model,
        /// which here corresponds to scores2).
        /// </summary>
        /// <param name="rankedItems"></param>
        /// <param name="ran"></param>
        public static void SortNJitter(RankedItem[] rankedItems, Random rand)
        {
            double scale = 1e-6;
            double val;
            double max1 = double.NegativeInfinity, min1 = double.PositiveInfinity, max2 = double.NegativeInfinity, min2 = double.PositiveInfinity;
            for (int i = 0; i < rankedItems.Length; ++i)
            {
                val = rankedItems[i].score1;
                if(val > max1)
                    max1 = val;
                if(val < min1)
                    min1 = val;
                val = rankedItems[i].score2;
                if(val > max2)
                    max2 = val;
                if(val < min2)
                    min2 = val;
            }

            for (int i = 0; i < rankedItems.Length; ++i)
            {
                val = rankedItems[i].score1;
                double ranVal = (2.0 * rand.NextDouble() - 1.0) * scale;
                double mult = (max1 == min1) ? 1.0 : (max1 - min1); // A very degenerate case, but we must handle it
                if (val == 0.0)
                {
                    rankedItems[i].score1 = mult * ranVal;
                }
                else
                {
                    rankedItems[i].score1 = val * (1.0 + ranVal);
                }

                val = rankedItems[i].score2;
                ranVal = (2.0 * rand.NextDouble() - 1.0) * scale;
                mult = (max2 == min2) ? 1.0 : (max2 - min2); // A very degenerate case, but we must handle it
                if (val == 0.0)
                {
                    rankedItems[i].score2 = mult * ranVal;
                }
                else
                {
                    rankedItems[i].score2 = val * (1.0 + ranVal);
                }

                rankedItems[i].score = rankedItems[i].score1;
            }

            Array.Sort(rankedItems);
        }
Beispiel #5
0
 public static void FillRanks(RankedItem[] rankedItems)
 {
     for(int i = 0; i < rankedItems.Length; ++i)
     {
         rankedItems[i].rank = i;
     }
 }
Beispiel #6
0
        /// <summary>
        /// query1 and query2 must be the same query (but with different scores).  The urls must be in the same order.  However they need not be sorted.
        /// </summary>
        /// <param name="query1"></param>
        /// <param name="query2"></param>
        /// <param name="dcg"></param>
        /// <param name="truncLevel"></param>
        /// <returns>Null if this query has zero maxDCG.  Else, a rankedItem array, sorted by the scores in query1.</returns>
        public static RankedItem[] FillRankedItems(Query query1, Query query2, DCGScorer scorer, Random ran)
        {
            if(query1.Length != query2.Length)
                throw new Exception("Query length mismatch.");
            if(query1.QueryID != query2.QueryID)
                throw new Exception("Queries have differnt IDs.");
            int length = query1.Length;
            double maxDCG = query1.MaxNonTruncDCG;
            if(maxDCG == 0.0)
                return null;
            RankedItem[] rankedItems = new RankedItem[length];
            double[] scores1 = query1.scores;
            double[] scores2 = query2.scores;
            string QID = query1.QueryID;
            for(int i = 0; i < length; ++i)
            {
                float label = query1.Labels[i];
                if(label != query2.Labels[i])
                    throw new Exception("FillRankedItems: label mismatch.");
                rankedItems[i] = new RankedItem((double)DCGScorer.scoresMap[(int)label] / maxDCG, scores1[i], scores2[i], label);//, QueryID);
            }

            if (rankedItems != null)
            {
                SortNJitter(rankedItems, ran);
            }
            return rankedItems;
        }
Beispiel #7
0
 public int CompareTo(RankedItem other)
 {
     return score > other.score ? -1 : ( score < other.score ? 1 : 0 );
 }