Пример #1
0
        /// Get the optimal metric value and its corresponding iteration ID
        /// </summary>
        /// <param name="dataType">in: the data type we are interested in (train/valid/test)</param>
        /// <param name="result">out: the actual value of the metric</param>
        /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns>
        public override int GetBest(DataPartitionType dataType, ref float result)
        {
            int idMin = -1;
            if (results_list.Count > 0)
            {
                result = results_list[0].Metrics[(int)dataType][0];
                idMin = results_list[0].Id;

                for (int i = 0; i < results_list.Count; i++)
                {
                    if (result > results_list[i].Metrics[(int)dataType][0])
                    {
                        result = results_list[i].Metrics[(int)dataType][0];
                        idMin = i;
                    }
                }
            }
            return idMin;
        }
Пример #2
0
 protected abstract void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics);
Пример #3
0
 public abstract int GetBest(DataPartitionType dataType, ref float result);
Пример #4
0
        public Metrics(LabelFeatureCore labelFeatureCore, LabelConverter labelConvert, DataPartitionType[] dataTypes)
        {
            this.labelFeatureCore = labelFeatureCore;
            this.dataTypes = dataTypes;
            this.optimalID = -1;

            this.labels = new float[labelFeatureCore.NumDataPoint];

            if (labelConvert != null)
            {
                for (int i = 0; i < labelFeatureCore.NumDataPoint; i++)
                {
                    this.labels[i] = labelConvert.convert(labelFeatureCore.GetLabel(i));
                }
            }
            else
            {
                for (int i = 0; i < labelFeatureCore.NumDataPoint; i++)
                {
                    this.labels[i] = labelFeatureCore.GetLabel(i);
                }
            }

            this.dataSegments = new int[(int)DataPartitionType.cTypes][];

            this.metricsCur = new float[(int)DataPartitionType.cTypes][];
            for (int i = 0; i < (int)DataPartitionType.cTypes; i++)
            {
                metricsCur[i] = new float[SIZE];
            }

            foreach (DataPartitionType dataType in dataTypes)
            {
                DataSet dataSet = labelFeatureCore.DataGroups.GetDataPartition(dataType);
                int[] dataSegment = dataSet.DataIndex;
                if (dataSegment != null)
                {
                    dataSegments[(int)dataType] = dataSegment;
                }
                else
                {
                    //we will fill in the non-existing data sections with 0
                    //throw new Exception("data partition does not exist");
                }
            }

            this.results_list = new List<Result>();
        }
Пример #5
0
        /// <summary>
        /// Retrive the data partition specifed before in actual data index
        /// </summary>
        /// <param name="pType">DataPartitionType</param>
        /// <param name="subSamplePercent">the percentage of specified partition to be returned</param>
        /// <param name="cSize">the actual number of total data points returned</param>
        /// <returns>Corresponding Data indices</returns>
        public DataSet GetDataPartition(DataPartitionType pType, float subSamplePercent, Random r)
        {
            DataSet dataSet = null;
            if (groupPartitionIndexTbl != null && pType < DataPartitionType.cTypes)
            {                
                int[] groupIndex = this.groupPartitionIndexTbl[(int)pType];

                int cGroups = groupIndex.Length;
                cGroups = (int)((float)cGroups * subSamplePercent);

                int[] sampleGroupIndex = null;
                if (r != null)
                {
                    sampleGroupIndex = Vector.RandomSample(groupIndex.Length, cGroups, r);                    
                }
                else
                {
                    sampleGroupIndex = Vector.IndexArray(cGroups);                    
                }
                for (int i = 0; i < cGroups; i++)
                {
                    sampleGroupIndex[i] = groupIndex[sampleGroupIndex[i]];
                }
                dataSet = new DataSet(this, sampleGroupIndex);                
            }

            return dataSet;
        }
Пример #6
0
 //dataType==null <=> all the data are used in one partition
 public ClassError(LabelFeatureCore labelFeatureCore, LabelConverter labelConvert, DataPartitionType[] dataTypes)
     : base(labelFeatureCore, labelConvert, dataTypes)
 {
 }
Пример #7
0
        protected override void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics)
        {
            int[] predict = ConvertClassProbToLabels(classprob, 0.5F);
            int[] index = dataSegments[(int)dataType];

            for (int i = 0; i < 2; i++)
            {
                for (int j = 0; j < 2; j++)
                {
                    stats[i, j] = 0;
                }
            }

            if (index != null)
            {
                for (int i = 0; i < index.Length; i++)
                {
                    stats[(int)labels[index[i]],predict[index[i]]]++;
                }
            }
            else
            {
                for (int i = 0; i < index.Length; i++)
                {
                    stats[(int)labels[i],predict[i]]++;
                }
            }

            metrics[(int)PrecRecallType.PRECESSION] = (float)stats[1, 1] / (float)(stats[0, 1] + stats[1, 1]);
            metrics[(int)PrecRecallType.RECALL] = (float)stats[1, 1] / (float)(stats[1, 0] + stats[1, 1]);
            metrics[2] = (float)(stats[1, 0] + stats[0, 1]) / (float)(stats[0, 0] + stats[0, 1] + stats[1, 0] + stats[1, 1]);
        }
Пример #8
0
        protected override void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics)
        {
            int[] predict = ConvertClassProbToLabels(classprob);

            int[] index = dataSegments[(int)dataType];
            int err = 0;
            double recip = 1.0;
            if (index != null)
            {
                for (int i = 0; i < index.Length; i++)
                {
                    if (labels[index[i]] <= 0 && predict[index[i]] == 0)
                        continue;
                    if (labels[index[i]] != predict[index[i]])
                        err++;
                }
                recip = 1.0 / ((double)index.Length + float.Epsilon);
            }
            else
            {
                for (int i = 0; i < predict.Length; i++)
                {
                    if (labels[i] <= 0 && predict[i] == 0)
                        continue;
                    if (labels[i] != predict[i])
                        err++;
                }
                recip = 1.0 / (double)(predict.Length + float.Epsilon);
            }
            // REVIEW: the following line looks like a bug to me (CJCB): index could be null; and you want to use predict.Length instead anyway,
            // if the 'else' holds
            //metrics[0] = err / (index.Length + float.Epsilon);
            metrics[0] = (float)(err * recip);
        }
Пример #9
0
 //dataType==null <=> all the data are used in one partition
 public PrecRecall(LabelFeatureData labelFeatureData, LabelConverter labelConvert, DataPartitionType[] dataTypes)
     : base(labelFeatureData, labelConvert, dataTypes)
 {
     this.stats = new int[2, 2];
 }
Пример #10
0
 /// <summary>
 /// Get the optimal metric value and its corresponding iteration ID
 /// </summary>
 /// <param name="dataType">in: the data type we are interested in</param>
 /// <param name="metricType">in: the metric type we are interested in</param>
 /// <param name="result">out: the actual value of the metric</param>
 /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns>
 public override int GetBest(DataPartitionType dataType, ref float result)
 {
     //int idMin = results_list[results_list.Count-1].Id;
     //we cannot really compare precison/recall
     return -1;
 }
Пример #11
0
 protected override void ComputeMetrics(float[] labels, float[][] scorePredict, DataPartitionType dataType, float[] metrics)
 {
     float[] scores = scorePredict[0];
     int[] index = dataSegments[(int)dataType];
     double err = 0;
     double recip = 1.0;
     if (index != null)
     {
         for (int i = 0; i < index.Length; i++)
         {
             double delta = labels[index[i]] - scores[index[i]];
             double thisErr = delta > 0 ? delta : -delta;
             err += thisErr;
         }
         recip = 1.0 / ((double)index.Length + float.Epsilon);
     }
     else
     {
         for (int i = 0; i < scores.Length; i++)
         {
             double delta = labels[i] - scores[i];
             double thisErr = delta > 0 ? delta : -delta;
             err += thisErr;
         }
         recip = 1.0 / ((double)scores.Length + float.Epsilon);
     }
     metrics[0] = (float)(err * recip);
 }
Пример #12
0
        protected override void ComputeMetrics(float[] labels, float[][] inScores, DataPartitionType dataType, float[] metrics)
        {
            //results initialization
            for (int i = 0; i < metrics.Length; i++)
            {
                metrics[i] = 0;
            }

            //(1) Compute NDCG, pairwise error, and cross-entropy
            DataSet dataSet = this.labelFeatureCore.DataGroups.GetDataPartition(dataType);
            int[] groupIndex = dataSet.GroupIndex;
            if (groupIndex != null && groupIndex.Length>0)
            {
                float[] scores = inScores[0];

                double cQueries = 0;

                double totalErrRate = 0;
                double totalCrossEnt = 0;

                for (int i = 0; i < groupIndex.Length; i++)
                {
                    DataGroup query = this.labelFeatureCore.DataGroups[groupIndex[i]];
                    RankPairGenerator rankPairs = new RankPairGenerator(query, labels);
                    double cErr = 0;
                    double CrossEnt = 0;
                    double cPairs = 0;
                    foreach (RankPair rankPair in rankPairs)
                    {
                        float scoreH_minus_scoreL = scores[rankPair.IdxH] - scores[rankPair.IdxL];
                        CrossEnt += RankPair.CrossEntropy(scoreH_minus_scoreL);
                        if (scoreH_minus_scoreL <= 0)
                        {
                            cErr++;
                        }
                        cPairs++;
                    }

                    if (cPairs > 0) // equivalent to !emptyQuery
                    {

                        totalErrRate += (cErr / cPairs);
                        totalCrossEnt += (CrossEnt / cPairs);
                        cQueries++;
                    }
                    else
                    {
                        if (!ndcg.DropEmptyQueries)
                        {
                            totalErrRate += 0.0F;
                            totalCrossEnt += 0.0F;
                            cQueries++;
                        }
                    }
                }

                metrics[(int)PairwiseType.PairCrossEnt] = (float)(totalCrossEnt / cQueries);
                metrics[(int)PairwiseType.PairError] = (float)(totalErrRate / cQueries);
            }
        }
Пример #13
0
        /// <summary>
        /// Get the optimal metric value and its corresponding iteration ID
        /// </summary>
        /// <param name="dataType">in: the data type we are interested in</param>
        /// <param name="metricType">in: the metric type we are interested in</param>
        /// <param name="result">out: the actual value of the metric</param>
        /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns>
        public override int GetBest(DataPartitionType dataType, ref float result)
        {
            int idMax = -1;
            if (results_list.Count > 0)
            {
                int index = (int)PairwiseType.PairError;
                result = results_list[0].Metrics[(int)dataType][index];
                idMax = results_list[0].Id;

                for (int i = 0; i < results_list.Count; i++)
                {
                    if (result < results_list[i].Metrics[(int)dataType][index])
                    {
                        result = results_list[i].Metrics[(int)dataType][index];
                        idMax = i;
                    }
                }
            }
            return idMax;
        }
Пример #14
0
        /// <summary>
        /// Get the optimal metric value and its corresponding iteration ID
        /// </summary>
        /// <param name="dataType">in: the data type we are interested in</param>
        /// <param name="metricType">in: the metric type we are interested in</param>
        /// <param name="result">out: the actual value of the metric</param>
        /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns>
        public override int GetBest(DataPartitionType dataType, ref float result)
        {
            int idMax = -1;
            if (results_list.Count > 0)
            {
                result = results_list[0].Metrics[(int)dataType][(int)NDCGType.meanTruncNDCG];
                idMax = results_list[0].Id;

                for (int i = 0; i < results_list.Count; i++)
                {
                    if (result < results_list[i].Metrics[(int)dataType][0])
                    {
                        result = results_list[i].Metrics[(int)dataType][0];
                        idMax = i;
                    }
                }
            }
            return idMax;
        }
Пример #15
0
 protected override void ComputeMetrics(float[] labels, float[][] scorePredict, DataPartitionType dataType, float[] metrics)
 {
     float[] scores = scorePredict[0];
     int[] index = dataSegments[(int)dataType];
     double err = 0;
     double recip = 1.0;
     if (index != null)
     {
         for (int i = 0; i < index.Length; i++)
         {
             double thisErr = labels[index[i]] - scores[index[i]];
             err += thisErr * thisErr;
         }
         recip = 1.0 / ((double)index.Length + float.Epsilon);
     }
     else
     {
         for (int i = 0; i < scores.Length; i++)
         {
             double thisErr = labels[i] - scores[i];
             err += thisErr * thisErr;
         }
         recip = 1.0 / ((double)scores.Length + float.Epsilon);
     }
     metrics[0] = (float)Math.Sqrt(err * recip); // Only one kind of metric for MSE (namely MSE)
 }
Пример #16
0
        protected override void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics)
        {
            //results initialization
            for (int i = 0; i < metrics.Length; i++)
            {
                metrics[i] = 0;
            }

            //(1) compute error rate
            DataSet dataSet = this.labelFeatureCore.DataGroups.GetDataPartition(dataType);
            int[] index = dataSet.DataIndex;
            if (index != null)
            {
                int[] predict = ConvertClassProbToLabels(classprob);
                int err = 0;
                for (int i = 0; i < index.Length; i++)
                {
                    if (labels[index[i]] <= 0 && predict[index[i]] == 0)
                        continue;
                    if (labels[index[i]] != predict[index[i]])
                        err++;
                }
                metrics[(int)NDCGType.CLASSERR] = (float)(err) / (index.Length + float.Epsilon);
            }

            //(2) Compute NDCGs
            int[] groupIndex = dataSet.GroupIndex;
            if (groupIndex != null && groupIndex.Length>0)
            {
                float[] scores = ConvertProbToScore(classprob);

                double totalMeanNDCG = 0;
                double totalPessNDCG = 0;
                double totalOptiNDCG = 0;
                double cQueries = 0;

                //compute the NDCG for each query/group
                double meanTruncNDCG, pessTruncNDCG, optiTruncNDCG;
                for (int i = 0; i < groupIndex.Length; i++)
                {
                    DataGroup query = this.labelFeatureCore.DataGroups[groupIndex[i]];
                    //float ndcg = this.ndcg.ComputeNDCG(query, labels, scores, this.ndcgAt);
                    bool emptyQuery = ndcg.ComputeNDCGs(query, labels, scores, out meanTruncNDCG, out pessTruncNDCG, out optiTruncNDCG);

                    if (!emptyQuery || (emptyQuery && !ndcg.DropEmptyQueries))
                    {
                        totalMeanNDCG += meanTruncNDCG;
                        totalPessNDCG += pessTruncNDCG;
                        totalOptiNDCG += optiTruncNDCG;
                        cQueries++;
                    }
                }
                metrics[(int)NDCGType.meanTruncNDCG] = (float)(totalMeanNDCG / cQueries);
                metrics[(int)NDCGType.pessTruncNDCG] = (float)(totalPessNDCG / cQueries);
                metrics[(int)NDCGType.optiTruncNDCG] = (float)(totalOptiNDCG / cQueries);
            }
        }
Пример #17
0
 //dataType==null <=> all the data are used in one partition
 public L_N(LabelFeatureCore labelFeatureCore, DataPartitionType[] dataTypes)
     : base(labelFeatureCore, dataTypes)
 {
 }
Пример #18
0
 public NDCGPairwise(LabelFeatureCore labelFeatureCore, LabelConverter labelConvert, DataPartitionType[] dataTypes,
     int ndcgAt, bool dropEmptyQueries, float scoreForEmptyQuery)
     : base(labelFeatureCore, labelConvert, dataTypes)
 {
     this.ndcg = new NDCG(dropEmptyQueries, scoreForEmptyQuery, ndcgAt);
 }
Пример #19
0
 public Metrics(LabelFeatureCore labelFeatureCore, DataPartitionType[] dataTypes)
     : this(labelFeatureCore, null, dataTypes)
 {
 }
Пример #20
0
 public DataSet GetDataPartition(DataPartitionType pType)
 {
     return GetDataPartition(pType, 1.0F, null);
 }
Пример #21
0
        /// <summary>
        /// Get the optimal metric value and its corresponding iteration ID
        /// </summary>
        /// <param name="dataType">in: the data type we are interested in</param>
        /// <param name="metricType">in: the metric type we are interested in</param>
        /// <param name="result">out: the actual value of the metric</param>
        /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns>
        public override int GetBest(DataPartitionType dataType, ref float result)
        {
            int idMax = -1;
            int index;
            if (results_list.Count > 0)
            {
                if ((int)dataType == 0) //training data
                {
                    index = (int)NDCGPairwiseType.PairError;
                }
                else
                {
                    index = (int)NDCGPairwiseType.meanTruncNDCG;
                }
                result = results_list[0].Metrics[(int)dataType][index];
                idMax = results_list[0].Id;

                for (int i = 0; i < results_list.Count; i++)
                {
                    if (result < results_list[i].Metrics[(int)dataType][index])
                    {
                        result = results_list[i].Metrics[(int)dataType][index];
                        idMax = i;
                    }
                }
            }
            return idMax;
        }