/// Get the optimal metric value and its corresponding iteration ID /// </summary> /// <param name="dataType">in: the data type we are interested in (train/valid/test)</param> /// <param name="result">out: the actual value of the metric</param> /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns> public override int GetBest(DataPartitionType dataType, ref float result) { int idMin = -1; if (results_list.Count > 0) { result = results_list[0].Metrics[(int)dataType][0]; idMin = results_list[0].Id; for (int i = 0; i < results_list.Count; i++) { if (result > results_list[i].Metrics[(int)dataType][0]) { result = results_list[i].Metrics[(int)dataType][0]; idMin = i; } } } return idMin; }
protected abstract void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics);
public abstract int GetBest(DataPartitionType dataType, ref float result);
public Metrics(LabelFeatureCore labelFeatureCore, LabelConverter labelConvert, DataPartitionType[] dataTypes) { this.labelFeatureCore = labelFeatureCore; this.dataTypes = dataTypes; this.optimalID = -1; this.labels = new float[labelFeatureCore.NumDataPoint]; if (labelConvert != null) { for (int i = 0; i < labelFeatureCore.NumDataPoint; i++) { this.labels[i] = labelConvert.convert(labelFeatureCore.GetLabel(i)); } } else { for (int i = 0; i < labelFeatureCore.NumDataPoint; i++) { this.labels[i] = labelFeatureCore.GetLabel(i); } } this.dataSegments = new int[(int)DataPartitionType.cTypes][]; this.metricsCur = new float[(int)DataPartitionType.cTypes][]; for (int i = 0; i < (int)DataPartitionType.cTypes; i++) { metricsCur[i] = new float[SIZE]; } foreach (DataPartitionType dataType in dataTypes) { DataSet dataSet = labelFeatureCore.DataGroups.GetDataPartition(dataType); int[] dataSegment = dataSet.DataIndex; if (dataSegment != null) { dataSegments[(int)dataType] = dataSegment; } else { //we will fill in the non-existing data sections with 0 //throw new Exception("data partition does not exist"); } } this.results_list = new List<Result>(); }
/// <summary> /// Retrive the data partition specifed before in actual data index /// </summary> /// <param name="pType">DataPartitionType</param> /// <param name="subSamplePercent">the percentage of specified partition to be returned</param> /// <param name="cSize">the actual number of total data points returned</param> /// <returns>Corresponding Data indices</returns> public DataSet GetDataPartition(DataPartitionType pType, float subSamplePercent, Random r) { DataSet dataSet = null; if (groupPartitionIndexTbl != null && pType < DataPartitionType.cTypes) { int[] groupIndex = this.groupPartitionIndexTbl[(int)pType]; int cGroups = groupIndex.Length; cGroups = (int)((float)cGroups * subSamplePercent); int[] sampleGroupIndex = null; if (r != null) { sampleGroupIndex = Vector.RandomSample(groupIndex.Length, cGroups, r); } else { sampleGroupIndex = Vector.IndexArray(cGroups); } for (int i = 0; i < cGroups; i++) { sampleGroupIndex[i] = groupIndex[sampleGroupIndex[i]]; } dataSet = new DataSet(this, sampleGroupIndex); } return dataSet; }
//dataType==null <=> all the data are used in one partition public ClassError(LabelFeatureCore labelFeatureCore, LabelConverter labelConvert, DataPartitionType[] dataTypes) : base(labelFeatureCore, labelConvert, dataTypes) { }
protected override void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics) { int[] predict = ConvertClassProbToLabels(classprob, 0.5F); int[] index = dataSegments[(int)dataType]; for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { stats[i, j] = 0; } } if (index != null) { for (int i = 0; i < index.Length; i++) { stats[(int)labels[index[i]],predict[index[i]]]++; } } else { for (int i = 0; i < index.Length; i++) { stats[(int)labels[i],predict[i]]++; } } metrics[(int)PrecRecallType.PRECESSION] = (float)stats[1, 1] / (float)(stats[0, 1] + stats[1, 1]); metrics[(int)PrecRecallType.RECALL] = (float)stats[1, 1] / (float)(stats[1, 0] + stats[1, 1]); metrics[2] = (float)(stats[1, 0] + stats[0, 1]) / (float)(stats[0, 0] + stats[0, 1] + stats[1, 0] + stats[1, 1]); }
protected override void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics) { int[] predict = ConvertClassProbToLabels(classprob); int[] index = dataSegments[(int)dataType]; int err = 0; double recip = 1.0; if (index != null) { for (int i = 0; i < index.Length; i++) { if (labels[index[i]] <= 0 && predict[index[i]] == 0) continue; if (labels[index[i]] != predict[index[i]]) err++; } recip = 1.0 / ((double)index.Length + float.Epsilon); } else { for (int i = 0; i < predict.Length; i++) { if (labels[i] <= 0 && predict[i] == 0) continue; if (labels[i] != predict[i]) err++; } recip = 1.0 / (double)(predict.Length + float.Epsilon); } // REVIEW: the following line looks like a bug to me (CJCB): index could be null; and you want to use predict.Length instead anyway, // if the 'else' holds //metrics[0] = err / (index.Length + float.Epsilon); metrics[0] = (float)(err * recip); }
//dataType==null <=> all the data are used in one partition public PrecRecall(LabelFeatureData labelFeatureData, LabelConverter labelConvert, DataPartitionType[] dataTypes) : base(labelFeatureData, labelConvert, dataTypes) { this.stats = new int[2, 2]; }
/// <summary> /// Get the optimal metric value and its corresponding iteration ID /// </summary> /// <param name="dataType">in: the data type we are interested in</param> /// <param name="metricType">in: the metric type we are interested in</param> /// <param name="result">out: the actual value of the metric</param> /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns> public override int GetBest(DataPartitionType dataType, ref float result) { //int idMin = results_list[results_list.Count-1].Id; //we cannot really compare precison/recall return -1; }
protected override void ComputeMetrics(float[] labels, float[][] scorePredict, DataPartitionType dataType, float[] metrics) { float[] scores = scorePredict[0]; int[] index = dataSegments[(int)dataType]; double err = 0; double recip = 1.0; if (index != null) { for (int i = 0; i < index.Length; i++) { double delta = labels[index[i]] - scores[index[i]]; double thisErr = delta > 0 ? delta : -delta; err += thisErr; } recip = 1.0 / ((double)index.Length + float.Epsilon); } else { for (int i = 0; i < scores.Length; i++) { double delta = labels[i] - scores[i]; double thisErr = delta > 0 ? delta : -delta; err += thisErr; } recip = 1.0 / ((double)scores.Length + float.Epsilon); } metrics[0] = (float)(err * recip); }
protected override void ComputeMetrics(float[] labels, float[][] inScores, DataPartitionType dataType, float[] metrics) { //results initialization for (int i = 0; i < metrics.Length; i++) { metrics[i] = 0; } //(1) Compute NDCG, pairwise error, and cross-entropy DataSet dataSet = this.labelFeatureCore.DataGroups.GetDataPartition(dataType); int[] groupIndex = dataSet.GroupIndex; if (groupIndex != null && groupIndex.Length>0) { float[] scores = inScores[0]; double cQueries = 0; double totalErrRate = 0; double totalCrossEnt = 0; for (int i = 0; i < groupIndex.Length; i++) { DataGroup query = this.labelFeatureCore.DataGroups[groupIndex[i]]; RankPairGenerator rankPairs = new RankPairGenerator(query, labels); double cErr = 0; double CrossEnt = 0; double cPairs = 0; foreach (RankPair rankPair in rankPairs) { float scoreH_minus_scoreL = scores[rankPair.IdxH] - scores[rankPair.IdxL]; CrossEnt += RankPair.CrossEntropy(scoreH_minus_scoreL); if (scoreH_minus_scoreL <= 0) { cErr++; } cPairs++; } if (cPairs > 0) // equivalent to !emptyQuery { totalErrRate += (cErr / cPairs); totalCrossEnt += (CrossEnt / cPairs); cQueries++; } else { if (!ndcg.DropEmptyQueries) { totalErrRate += 0.0F; totalCrossEnt += 0.0F; cQueries++; } } } metrics[(int)PairwiseType.PairCrossEnt] = (float)(totalCrossEnt / cQueries); metrics[(int)PairwiseType.PairError] = (float)(totalErrRate / cQueries); } }
/// <summary> /// Get the optimal metric value and its corresponding iteration ID /// </summary> /// <param name="dataType">in: the data type we are interested in</param> /// <param name="metricType">in: the metric type we are interested in</param> /// <param name="result">out: the actual value of the metric</param> /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns> public override int GetBest(DataPartitionType dataType, ref float result) { int idMax = -1; if (results_list.Count > 0) { int index = (int)PairwiseType.PairError; result = results_list[0].Metrics[(int)dataType][index]; idMax = results_list[0].Id; for (int i = 0; i < results_list.Count; i++) { if (result < results_list[i].Metrics[(int)dataType][index]) { result = results_list[i].Metrics[(int)dataType][index]; idMax = i; } } } return idMax; }
/// <summary> /// Get the optimal metric value and its corresponding iteration ID /// </summary> /// <param name="dataType">in: the data type we are interested in</param> /// <param name="metricType">in: the metric type we are interested in</param> /// <param name="result">out: the actual value of the metric</param> /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns> public override int GetBest(DataPartitionType dataType, ref float result) { int idMax = -1; if (results_list.Count > 0) { result = results_list[0].Metrics[(int)dataType][(int)NDCGType.meanTruncNDCG]; idMax = results_list[0].Id; for (int i = 0; i < results_list.Count; i++) { if (result < results_list[i].Metrics[(int)dataType][0]) { result = results_list[i].Metrics[(int)dataType][0]; idMax = i; } } } return idMax; }
protected override void ComputeMetrics(float[] labels, float[][] scorePredict, DataPartitionType dataType, float[] metrics) { float[] scores = scorePredict[0]; int[] index = dataSegments[(int)dataType]; double err = 0; double recip = 1.0; if (index != null) { for (int i = 0; i < index.Length; i++) { double thisErr = labels[index[i]] - scores[index[i]]; err += thisErr * thisErr; } recip = 1.0 / ((double)index.Length + float.Epsilon); } else { for (int i = 0; i < scores.Length; i++) { double thisErr = labels[i] - scores[i]; err += thisErr * thisErr; } recip = 1.0 / ((double)scores.Length + float.Epsilon); } metrics[0] = (float)Math.Sqrt(err * recip); // Only one kind of metric for MSE (namely MSE) }
protected override void ComputeMetrics(float[] labels, float[][] classprob, DataPartitionType dataType, float[] metrics) { //results initialization for (int i = 0; i < metrics.Length; i++) { metrics[i] = 0; } //(1) compute error rate DataSet dataSet = this.labelFeatureCore.DataGroups.GetDataPartition(dataType); int[] index = dataSet.DataIndex; if (index != null) { int[] predict = ConvertClassProbToLabels(classprob); int err = 0; for (int i = 0; i < index.Length; i++) { if (labels[index[i]] <= 0 && predict[index[i]] == 0) continue; if (labels[index[i]] != predict[index[i]]) err++; } metrics[(int)NDCGType.CLASSERR] = (float)(err) / (index.Length + float.Epsilon); } //(2) Compute NDCGs int[] groupIndex = dataSet.GroupIndex; if (groupIndex != null && groupIndex.Length>0) { float[] scores = ConvertProbToScore(classprob); double totalMeanNDCG = 0; double totalPessNDCG = 0; double totalOptiNDCG = 0; double cQueries = 0; //compute the NDCG for each query/group double meanTruncNDCG, pessTruncNDCG, optiTruncNDCG; for (int i = 0; i < groupIndex.Length; i++) { DataGroup query = this.labelFeatureCore.DataGroups[groupIndex[i]]; //float ndcg = this.ndcg.ComputeNDCG(query, labels, scores, this.ndcgAt); bool emptyQuery = ndcg.ComputeNDCGs(query, labels, scores, out meanTruncNDCG, out pessTruncNDCG, out optiTruncNDCG); if (!emptyQuery || (emptyQuery && !ndcg.DropEmptyQueries)) { totalMeanNDCG += meanTruncNDCG; totalPessNDCG += pessTruncNDCG; totalOptiNDCG += optiTruncNDCG; cQueries++; } } metrics[(int)NDCGType.meanTruncNDCG] = (float)(totalMeanNDCG / cQueries); metrics[(int)NDCGType.pessTruncNDCG] = (float)(totalPessNDCG / cQueries); metrics[(int)NDCGType.optiTruncNDCG] = (float)(totalOptiNDCG / cQueries); } }
//dataType==null <=> all the data are used in one partition public L_N(LabelFeatureCore labelFeatureCore, DataPartitionType[] dataTypes) : base(labelFeatureCore, dataTypes) { }
public NDCGPairwise(LabelFeatureCore labelFeatureCore, LabelConverter labelConvert, DataPartitionType[] dataTypes, int ndcgAt, bool dropEmptyQueries, float scoreForEmptyQuery) : base(labelFeatureCore, labelConvert, dataTypes) { this.ndcg = new NDCG(dropEmptyQueries, scoreForEmptyQuery, ndcgAt); }
public Metrics(LabelFeatureCore labelFeatureCore, DataPartitionType[] dataTypes) : this(labelFeatureCore, null, dataTypes) { }
public DataSet GetDataPartition(DataPartitionType pType) { return GetDataPartition(pType, 1.0F, null); }
/// <summary> /// Get the optimal metric value and its corresponding iteration ID /// </summary> /// <param name="dataType">in: the data type we are interested in</param> /// <param name="metricType">in: the metric type we are interested in</param> /// <param name="result">out: the actual value of the metric</param> /// <returns>the iteration Index/ID the produces the minimal of the metricType on dataType</returns> public override int GetBest(DataPartitionType dataType, ref float result) { int idMax = -1; int index; if (results_list.Count > 0) { if ((int)dataType == 0) //training data { index = (int)NDCGPairwiseType.PairError; } else { index = (int)NDCGPairwiseType.meanTruncNDCG; } result = results_list[0].Metrics[(int)dataType][index]; idMax = results_list[0].Id; for (int i = 0; i < results_list.Count; i++) { if (result < results_list[i].Metrics[(int)dataType][index]) { result = results_list[i].Metrics[(int)dataType][index]; idMax = i; } } } return idMax; }