Пример #1
0
        /// <summary>
        /// Requirements:
        /// (1) all the data points are indexed from 0 ... cDataCountsTotal
        /// (2) all data points belongs to one group are contingious
        /// </summary>
        /// <param name="groupId">maps a data point to its group ID</param>
        public DataGroups(LabelFeatureCore labelFeatureCore)
        {
            NumDataPoints = labelFeatureCore.NumDataPoint;

            List<int> listGroupId = new List<int>();
            List<int> listStartIdx = new List<int>();

            listGroupId.Add(labelFeatureCore.GetGroupId(0));
            listStartIdx.Add(0);
            int curID = labelFeatureCore.GetGroupId(0);
            for (int i = 0; i < NumDataPoints; i++)
            {
                int nextID = labelFeatureCore.GetGroupId(i);
                if (nextID != curID)
                {
                    listStartIdx.Add(i);
                    listGroupId.Add(nextID);
                    curID = nextID;
                }
            }

            groupCounts = listStartIdx.Count;
            dataGroups = new DataGroup[groupCounts];

            for (int i = 0; i < groupCounts; i++)
            {
                dataGroups[i] = new DataGroup();
                dataGroups[i].id = listGroupId[i];
                dataGroups[i].iStart = listStartIdx[i];
                if (i < groupCounts - 1)
                {
                    dataGroups[i].cSize = listStartIdx[i + 1] - listStartIdx[i];
                }
                else
                {
                    dataGroups[i].cSize = NumDataPoints - listStartIdx[i];
                }
            }
        }
Пример #2
0
 private float AbsDeltaNDCG(RankPair rankPair, DataGroup queryGroup, Query query)
 {
     int idx1 = rankPair.IdxL - queryGroup.iStart;
     int idx2 = rankPair.IdxH - queryGroup.iStart;
     return query.AbsDeltaNDCG(idx1, idx2);
 }
Пример #3
0
        private Query CreateQuery(DataGroup queryGroup, float[] inLabels, float[] inScores,
                                  float labelForUnlabeled, double scoreForDegenerateQuery)
        {
            string QID = queryGroup.id.ToString();
            float[] labels = new float[queryGroup.cSize];
            double[] scores = new double[queryGroup.cSize];
            int end = queryGroup.iStart + queryGroup.cSize;
            for (int i = queryGroup.iStart; i < end; i++)
            {
                labels[i - queryGroup.iStart] = (float)inLabels[i];
                scores[i - queryGroup.iStart] = inScores[i];
            }

            DCGScorer.truncLevel = this.truncLevel;
            Query query = new Query(QID, labels, null, scores, labelForUnlabeled, scoreForDegenerateQuery);
            return query;
        }
Пример #4
0
        // REVIEW: This should be done once per query, in the query constructor
        protected bool ComputeMaxTruncDCG(DataGroup query, float[] labels, out double maxTruncDCG)
        {
            float[] workLabels = new float[query.cSize];
            float firstLabel = labels[query.iStart];
            bool emptyQuery = true;
            for (int iLabelTo = 0, iLabelFrom = query.iStart; iLabelTo < query.cSize; ++iLabelTo, ++iLabelFrom)
            {
                if (labels[iLabelFrom] != firstLabel)
                    emptyQuery = false;
                workLabels[iLabelTo] = labels[iLabelFrom];
            }
            maxTruncDCG = dcgScorer.ComputeMaxTruncDCG(workLabels);

            return emptyQuery;
        }
Пример #5
0
        protected void ComputeTruncDCGs(DataGroup query, float[] labels, float[] scores, out double meanTruncDCG, out double pessTruncDCG, out double optiTruncDCG)
        {
            double[] workScores = new double[query.cSize]; // REVIEW: these should be allocated once, in the query object
            Array.Copy(scores, query.iStart, workScores, 0, query.cSize);
            float[] workLabels = new float[query.cSize];
            Array.Copy(labels, query.iStart, workLabels, 0, query.cSize);

            double nonTruncDCG;
            dcgScorer.ComputeMeanDCGs(workScores, workLabels, out meanTruncDCG, out nonTruncDCG);
            bool pessimistic = true;
            dcgScorer.ComputeDCGs(pessimistic, workScores, workLabels, out pessTruncDCG, out nonTruncDCG);
            pessimistic = false;
            dcgScorer.ComputeDCGs(pessimistic, workScores, workLabels, out optiTruncDCG, out nonTruncDCG);
        }
Пример #6
0
        public bool ComputeNDCGs(DataGroup query, float[] labels, float[] scores, out double meanTruncNDCG, out double pessTruncNDCG, out double optiTruncNDCG)
        {
            double meanTruncDCG, pessTruncDCG, optiTruncDCG;
            ComputeTruncDCGs(query, labels, scores, out meanTruncDCG, out pessTruncDCG, out optiTruncDCG);
            double maxTruncDCG;
            bool emptyQuery = ComputeMaxTruncDCG(query, labels, out maxTruncDCG); // REVIEW: this should be done once, in a query constructor

            if (emptyQuery)
            {
                // Note that empty queries, if they are dropped, must be taken care of in the calling code.  However we must assign something here.
                if (dropEmptyQueries)
                {
                    meanTruncNDCG = double.NegativeInfinity;
                    pessTruncNDCG = double.NegativeInfinity;
                    optiTruncNDCG = double.NegativeInfinity;
                }
                else
                {
                    meanTruncNDCG = scoreForEmptyQuery;
                    pessTruncNDCG = scoreForEmptyQuery;
                    optiTruncNDCG = scoreForEmptyQuery;
                }
            }
            else
            {
                double recipMaxTruncDCG = 1.0 / maxTruncDCG; // maxTruncDCG cannot be zero for non-empty queries, unless more than one label is assigned zero gain
                meanTruncNDCG = meanTruncDCG * recipMaxTruncDCG;
                pessTruncNDCG = pessTruncDCG * recipMaxTruncDCG;
                optiTruncNDCG = optiTruncDCG * recipMaxTruncDCG;
            }

            return emptyQuery;
        }
Пример #7
0
 public RankPairGenerator(DataGroup dataGroup, float[] rating)
 {
     this.dataGroup = dataGroup;
     this.rating = rating;
 }
Пример #8
0
        protected float ComputeMaxDCG(DataGroup query, float[] labels, float[] scores, int ndcgAt)
        {
            float[] workLabels = new float[query.cSize];
            Array.Copy(labels, query.iStart, workLabels, 0, query.cSize);
            Array.Sort(workLabels);

            float maxDCG = 0.0F;
            int last = Math.Min(ndcgAt, query.cSize);
            for (int i = 0; i < last; i++)
            {
                float label = workLabels[query.cSize - i - 1];
                if (label < 0)
                    label = 0;
                maxDCG += scoresMap[(int)label] * rankCoeffs[i];
            }
            return maxDCG;
        }
Пример #9
0
        protected float ComputeDCG(DataGroup query, float[] labels, float[] scores, int ndcgAt)
        {
            float dcg = 0.0F;
            int last = Math.Min(ndcgAt, query.cSize);
            float[] workScores = new float[query.cSize];
            Array.Copy(scores, query.iStart, workScores, 0, query.cSize);

            int[] workLabels = new int[query.cSize];
            Array.Copy(labels, query.iStart, workLabels, 0, query.cSize);
            Array.Sort(workScores, workLabels);

            for (int j = 0; j < last; j++)
            {
                int label = workLabels[query.cSize - j - 1];
                if (label < 0)
                    label = 0;
                dcg += scoresMap[label] * rankCoeffs[j];
            }
            return dcg;
        }
Пример #10
0
        public float ComputeNDCG(DataGroup query, float[] labels, float[] scores, int ndcgAt)
        {
            float dcg = ComputeDCG(query, labels, scores, ndcgAt);
            float maxDcg = ComputeMaxDCG(query, labels, scores, ndcgAt);

            float ndcg = 0.0F;
            if (maxDcg == 0.0)
            {
                ndcg = (dropEmptyQueries ? 0.0F : scoreForEmptyQuery); // THis is a bug: if we dropEmptyQueries, we don't count them at all
            }
            else
            {
                ndcg = (dcg / maxDcg);
            }
            return ndcg;
        }