public void AddWeakLearner(RegressionTree[] candidateTree, float[] funValueGain, int m, Metrics metrics, BoostTreeLoss boostTreeLoss, DataFeatureSampleRate dataFeatureSampleRate, int maxTreeSize, int minNumSamples, int cThreads, Random r) { //update the function value for all data points given the new regression tree for (int i = 0; i < boostTreeLoss.NumTreesPerIteration; i++) { candidateTree[i].PredictFunValue(this.labelFeatureDataCoded, true, ref funValueGain); this.regressionTrees[m, i] = candidateTree[i]; boostTreeLoss.AccFuncValueGain(funValueGain, candidateTree[i].AdjustFactor, i); } }
public double EvaluateWeakLearner(RegressionTree[] candidateTree, float[] funValueGain, Metrics metrics, BoostTreeLoss boostTreeLoss, int id) { float[][] scores = new float[boostTreeLoss.NumTreesPerIteration][]; for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++) { scores[k] = new float[funValueGain.GetLength(0)]; for (int i = 0; i < funValueGain.GetLength(0); i++) { scores[k][i] = 0.0F; } } double result = 0.0; for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++) { candidateTree[k].PredictFunValue(this.labelFeatureDataCoded, true, ref funValueGain); //we hard code here that k=0 (not performing classification) //kms: this is a bit of hack...only will really work for non-classification currently // upgrade to have a per loss function evaluation for (int i = 0; i < funValueGain.GetLength(0); i++) { scores[k][i] = boostTreeLoss.ModelScores[k][i] +(funValueGain[i] * candidateTree[k].AdjustFactor); } } //need to update id so we have unique id. For now, we take M + m + 1; //assume only want NDCGPairwise for now metrics.ComputeMetrics(scores, id, false); //NDCGPairwiseType = 2; result = metrics.ResultsStrMatrix(id)[(int)DataPartitionType.Train][2]; //Console.WriteLine(result); return result; }
public RegressionTree[] GetNextWeakLearner(int m, float[] funValueGain, Metrics metrics, BoostTreeLoss boostTreeLoss, DataFeatureSampleRate dataFeatureSampleRate, RandomSampler dataSampler, RandomSampler featureSampler, int maxTreeSize, int minNumSamples, int cThreads, Random r) { // select a fraction of data groups for this iteration float sampleRate = dataFeatureSampleRate.SampleDataGroupRate(m); DataSet workDataSet = this.labelFeatureDataCoded.DataGroups.GetDataPartition(DataPartitionType.Train, sampleRate, r); workDataSet.Sort(); // sorting gains some noticable speedup. // compute the pseudo response of the current system boostTreeLoss.ComputePseudoResponse(workDataSet); //set the data and feature sampling rate for node spliting in this iteration featureSampler.SampleRate = dataFeatureSampleRate.SampleFeatureRate(m); dataSampler.SampleRate = dataFeatureSampleRate.SampleDataRate(m); // fit a residual model (regression trees) from the pseudo response // to compensate the error of the current system RegressionTree[] newTree = new RegressionTree[boostTreeLoss.NumTreesPerIteration]; for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++) { //only use the important data points if necessary int[] trimIndex = boostTreeLoss.TrimIndex(workDataSet, k, m); //build a regression tree according to the pseduo-response newTree[k] = new RegressionTree(this.labelFeatureDataCoded, boostTreeLoss, k, trimIndex, dataSampler, featureSampler, maxTreeSize, minNumSamples, this.findSplit, this.tempSpace); //compute the function value of all data points produced by the newly generated regression tree newTree[k].PredictFunValue(this.labelFeatureDataCoded, ref funValueGain); //try to do a more global optimalization - refine the leaf node response of a decision tree //by looking at all the training data points, instead of only the ones falling into the regaion. //Here we are estimate and apply a global mutiplication factor for all leaf nodes float adjFactor = (m > 0) ? boostTreeLoss.ComputeResponseAdjust(funValueGain) : 1.0F; //apply the multiplication factor to the leaf nodes of the newly generated regression tree newTree[k].AdjustResponse(adjFactor); newTree[k].AdjustFactor = adjFactor; } //return the k regression trees return newTree; }