public PairRankedItems previous; // automatically construct linked list #endregion Fields #region Constructors public PairRankedItems(RankedItem r1, RankedItem r2) { item1 = r1; item2 = r2; previous = mostRecent; mostRecent = this; }
/// <summary> /// Find every pair, compute alpha, and store the pair in pairRankedItems. Items are assumed already ranked. /// In fact the convex version (limited to [0,1]) is equivalent to the non-convex version (allowing infinite steps) /// (just rescale by 1/(1-alpha)). However the latter is useful because (1) we can easily control step size and (2) /// the old weights are left unchanged. /// </summary> /// <param name="rankedItems"></param> /// <param name="convex"></param> /// <param name="maxStep">max step to use for non-convex version.</param> /// <param name="ctr"></param> /// <returns></returns> public static PairRankedItems FillPairRankedItems(RankedItem[] rankedItems, bool convex, bool alphaPos, double maxStep, ref int ctr) { PairRankedItems pri = null; for(int i = 0; i < rankedItems.Length - 1; ++i) { RankedItem x = rankedItems[i]; for(int j = i + 1; j < rankedItems.Length; ++j) { RankedItem y = rankedItems[j]; // See boostingNotes.docx. if(convex) { // The convex combination version: (1-alpha)*score1 + alpha*score2. Disadvantage: previously computed weights // keep changing. if(x.score2 - x.score1 + y.score1 - y.score2 != 0) { // Note ranks go inversely with score double alpha = ( y.score1 - x.score1 ) / ( x.score2 - x.score1 + y.score1 - y.score2 ); if(alpha >= 0.0 && alpha <= 1.0) { pri = new PairRankedItems(x, y); pri.alpha = alpha; } } } else { // The score1 + alpha*score2 version. Advantage: leaves previously computed weights the same. if(x.score2 != y.score2) { double alpha = ( x.score1 - y.score1 ) / ( y.score2 - x.score2 ); // alpha=0 corresponds to original rank order for both convex and non-convex combinations if( (alphaPos && alpha >= 0.0 && alpha <= maxStep) || (!alphaPos && alpha <= 0.0 && alpha >= -maxStep) ) { pri = new PairRankedItems(x, y); pri.alpha = alpha; } } } } } return pri; }
/// <summary> /// Add jitter to those items and only those items with duplicate scores, to reduce single crossing points /// as alpha sweeps. However, although this helps a lot, it is not sufficient: consider the case where scores1 = {1,2,3} /// and scores2 = {3,2,1}. They all meet in the middle. This is fixed in FindBestAlpha. Algorithmically speaking /// this is not needed, but we add jitter here to significantly reduce overall computational cost when the amount of /// degeneracy is high. /// /// Returns with rankedItems sorted by the (jittered) score1's. /// </summary> /// <param name="rankedItems"></param> /// <param name="ran"></param> public static void SortNJitterOld(RankedItem[] rankedItems, Random ran) { // We only have to sort score2 to add jitter, and if needed, we don't need to re-sort. double scale = 1e-6; for (int i = 0; i < rankedItems.Length; ++i) { rankedItems[i].score = rankedItems[i].score2; } Array.Sort(rankedItems); for (int i = 0, j = 1; i < rankedItems.Length - 1; ++i, ++j) { if (rankedItems[i].score2 == rankedItems[j].score2) { rankedItems[i].score2 += scale * (ran.NextDouble() - 0.5); // leave 'j' alone for next comparison } } bool needResorting = false; for (int i = 0; i < rankedItems.Length; ++i) { rankedItems[i].score = rankedItems[i].score1; } Array.Sort(rankedItems); for (int i = 0, j = 1; i < rankedItems.Length - 1; ++i, ++j) { if (rankedItems[i].score1 == rankedItems[j].score1) { rankedItems[i].score1 += scale * (ran.NextDouble() - 0.5); rankedItems[i].score = rankedItems[i].score1; needResorting = true; } } if (needResorting) Array.Sort(rankedItems); }
/// <summary> /// This version jitters everything, in an attempt to prevent any degeneracy. This also effectively picks a particular ranking /// for the NDCG baseline, if there is degeneracy. The only case where this returns null is when all the scores (before or after or /// both) are the same, in which case we simply should return 1.0 for the optimal alpha, since the lines will never cross (and if /// all initial scores are zero, chances are we've started with an empty model and should just take the scores of the first trained model, /// which here corresponds to scores2). /// </summary> /// <param name="rankedItems"></param> /// <param name="ran"></param> public static void SortNJitter(RankedItem[] rankedItems, Random rand) { double scale = 1e-6; double val; double max1 = double.NegativeInfinity, min1 = double.PositiveInfinity, max2 = double.NegativeInfinity, min2 = double.PositiveInfinity; for (int i = 0; i < rankedItems.Length; ++i) { val = rankedItems[i].score1; if(val > max1) max1 = val; if(val < min1) min1 = val; val = rankedItems[i].score2; if(val > max2) max2 = val; if(val < min2) min2 = val; } for (int i = 0; i < rankedItems.Length; ++i) { val = rankedItems[i].score1; double ranVal = (2.0 * rand.NextDouble() - 1.0) * scale; double mult = (max1 == min1) ? 1.0 : (max1 - min1); // A very degenerate case, but we must handle it if (val == 0.0) { rankedItems[i].score1 = mult * ranVal; } else { rankedItems[i].score1 = val * (1.0 + ranVal); } val = rankedItems[i].score2; ranVal = (2.0 * rand.NextDouble() - 1.0) * scale; mult = (max2 == min2) ? 1.0 : (max2 - min2); // A very degenerate case, but we must handle it if (val == 0.0) { rankedItems[i].score2 = mult * ranVal; } else { rankedItems[i].score2 = val * (1.0 + ranVal); } rankedItems[i].score = rankedItems[i].score1; } Array.Sort(rankedItems); }
public static void FillRanks(RankedItem[] rankedItems) { for(int i = 0; i < rankedItems.Length; ++i) { rankedItems[i].rank = i; } }
/// <summary> /// query1 and query2 must be the same query (but with different scores). The urls must be in the same order. However they need not be sorted. /// </summary> /// <param name="query1"></param> /// <param name="query2"></param> /// <param name="dcg"></param> /// <param name="truncLevel"></param> /// <returns>Null if this query has zero maxDCG. Else, a rankedItem array, sorted by the scores in query1.</returns> public static RankedItem[] FillRankedItems(Query query1, Query query2, DCGScorer scorer, Random ran) { if(query1.Length != query2.Length) throw new Exception("Query length mismatch."); if(query1.QueryID != query2.QueryID) throw new Exception("Queries have differnt IDs."); int length = query1.Length; double maxDCG = query1.MaxNonTruncDCG; if(maxDCG == 0.0) return null; RankedItem[] rankedItems = new RankedItem[length]; double[] scores1 = query1.scores; double[] scores2 = query2.scores; string QID = query1.QueryID; for(int i = 0; i < length; ++i) { float label = query1.Labels[i]; if(label != query2.Labels[i]) throw new Exception("FillRankedItems: label mismatch."); rankedItems[i] = new RankedItem((double)DCGScorer.scoresMap[(int)label] / maxDCG, scores1[i], scores2[i], label);//, QueryID); } if (rankedItems != null) { SortNJitter(rankedItems, ran); } return rankedItems; }
public int CompareTo(RankedItem other) { return score > other.score ? -1 : ( score < other.score ? 1 : 0 ); }