Esempio n. 1
0
        public void AddWeakLearner(RegressionTree[] candidateTree, float[] funValueGain, int m, Metrics metrics, BoostTreeLoss boostTreeLoss, DataFeatureSampleRate dataFeatureSampleRate, int maxTreeSize, int minNumSamples, int cThreads, Random r)
        {
            //update the function value for all data points given the new regression tree
            for (int i = 0; i < boostTreeLoss.NumTreesPerIteration; i++)
            {
                candidateTree[i].PredictFunValue(this.labelFeatureDataCoded, true, ref funValueGain);

                this.regressionTrees[m, i] = candidateTree[i];
                boostTreeLoss.AccFuncValueGain(funValueGain, candidateTree[i].AdjustFactor, i);
            }
        }
Esempio n. 2
0
        public double EvaluateWeakLearner(RegressionTree[] candidateTree, float[] funValueGain, Metrics metrics, BoostTreeLoss boostTreeLoss, int id)
        {
            float[][] scores = new float[boostTreeLoss.NumTreesPerIteration][];
            for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++)
            {
                scores[k] = new float[funValueGain.GetLength(0)];
                for (int i = 0; i < funValueGain.GetLength(0); i++)
                {
                    scores[k][i] = 0.0F;
                }
            }

            double result = 0.0;

            for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++)
            {

                candidateTree[k].PredictFunValue(this.labelFeatureDataCoded, true, ref funValueGain);

                //we hard code here that k=0 (not performing classification)
                //kms: this is a bit of hack...only will really work for non-classification currently
                // upgrade to have a per loss function evaluation
                for (int i = 0; i < funValueGain.GetLength(0); i++)
                {
                    scores[k][i] =  boostTreeLoss.ModelScores[k][i] +(funValueGain[i] * candidateTree[k].AdjustFactor);
                }
            }

            //need to update id so we have unique id.  For now, we take M + m + 1;
            //assume only want NDCGPairwise for now
            metrics.ComputeMetrics(scores, id, false);
            //NDCGPairwiseType = 2;
            result = metrics.ResultsStrMatrix(id)[(int)DataPartitionType.Train][2];
            //Console.WriteLine(result);

            return result;
        }
Esempio n. 3
0
        public RegressionTree[] GetNextWeakLearner(int m, float[] funValueGain, Metrics metrics, BoostTreeLoss boostTreeLoss, DataFeatureSampleRate dataFeatureSampleRate, RandomSampler dataSampler, RandomSampler featureSampler,
                                    int maxTreeSize, int minNumSamples, int cThreads, Random r)
        {
            // select a fraction of data groups for this iteration
            float sampleRate = dataFeatureSampleRate.SampleDataGroupRate(m);
            DataSet workDataSet = this.labelFeatureDataCoded.DataGroups.GetDataPartition(DataPartitionType.Train, sampleRate, r);
            workDataSet.Sort();  // sorting gains some noticable speedup.

            // compute the pseudo response of the current system
            boostTreeLoss.ComputePseudoResponse(workDataSet);

            //set the data and feature sampling rate for node spliting in this iteration
            featureSampler.SampleRate = dataFeatureSampleRate.SampleFeatureRate(m);
            dataSampler.SampleRate = dataFeatureSampleRate.SampleDataRate(m);

            // fit a residual model (regression trees) from the pseudo response
            // to compensate the error of the current system

            RegressionTree[] newTree = new RegressionTree[boostTreeLoss.NumTreesPerIteration];

            for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++)
            {
                //only use the important data points if necessary
                int[] trimIndex = boostTreeLoss.TrimIndex(workDataSet, k, m);

                //build a regression tree according to the pseduo-response
                newTree[k] = new RegressionTree(this.labelFeatureDataCoded, boostTreeLoss, k, trimIndex,
                                                                dataSampler, featureSampler, maxTreeSize, minNumSamples, this.findSplit, this.tempSpace);

                //compute the function value of all data points produced by the newly generated regression tree
                newTree[k].PredictFunValue(this.labelFeatureDataCoded, ref funValueGain);

                //try to do a more global optimalization - refine the leaf node response of a decision tree
                //by looking at all the training data points, instead of only the ones falling into the regaion.
                //Here we are estimate and apply a global mutiplication factor for all leaf nodes
                float adjFactor = (m > 0) ? boostTreeLoss.ComputeResponseAdjust(funValueGain) : 1.0F;

                //apply the multiplication factor to the leaf nodes of the newly generated regression tree
                newTree[k].AdjustResponse(adjFactor);
                newTree[k].AdjustFactor = adjFactor;
            }

            //return the k regression trees
            return newTree;
        }