C# (CSharp) BoostTreeLoss.ModelScoresToFuncValues Examples

Programming Language: C# (CSharp)

Class/Type: BoostTreeLoss

Method/Function: ModelScoresToFuncValues

Examples at hotexamples.com: 3

C# (CSharp) BoostTreeLoss.ModelScoresToFuncValues - 3 examples found. These are the top rated real world C# (CSharp) examples of BoostTreeLoss.ModelScoresToFuncValues extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AccFuncValueGain(3)

FuncValuesToModelScores(3)

ModelEval(3)

ModelScoresToFuncValues(3)

ComputePseudoResponse(2)

ComputeResponseAdjust(2)

TrimIndex(2)

PseudoResponse(1)

Reset(1)

Response(1)

Example #1

Show file

File: BoostTree.cs Project: zbxzc35/BoostTree

        /// <summary>
        /// This method implements the main functionality of stochastic gradient boosting
        /// </summary>
        private void BuildBoostTree(Metrics metrics, BoostTreeLoss boostTreeLoss, DataFeatureSampleRate dataFeatureSampleRate,
                                    int maxTreeSize, int minNumSamples, int numIter,
                                    int cThreads, Random r)
        {
            float minValidationErr = 100;

            float[] funValueGain = new float[this.numSamples];

            //(1) compute scores produced by the sub-model
            boostTreeLoss.ModelEval(this.subModel, this.labelFeatureDataCoded, this.subModelScore);

            //(2) compute the corresponding function values;
            boostTreeLoss.ModelScoresToFuncValues();

            //(3) compute the metrics of the sub-model
            int m = optIter = 0;
            metrics.ComputeMetrics(boostTreeLoss.ModelScores, m, false);

#if VERBOSE
            Console.WriteLine(metrics.ResultsHeaderStr());
            Console.WriteLine(metrics.ResultsStr(m));
#endif
            //(4) creat samplers to sub-sampl the features and data during node spliting
            RandomSampler featureSampler = new RandomSampler(r);
            RandomSampler dataSampler = new RandomSampler(r);

            //(5) creat the object that does node splitting
#if SINGLE_THREAD
            // single-threaded
             this.findSplit = new FindSplitSync();
#else
            // multi-threaded
            this.findSplit = new FindSplitAsync(cThreads);
#endif //SINGLE_THREAD

            //(6) Iteratively building boosted trees
            for (m = 0; m < numIter; m++)
            {
                // selecting a fraction of data groups for each iteration
                float sampleRate = dataFeatureSampleRate.SampleDataGroupRate(m);
                DataSet workDataSet = this.labelFeatureDataCoded.DataGroups.GetDataPartition(DataPartitionType.Train, sampleRate, r);
                workDataSet.Sort();  // sorting gains some noticable speedup.

                // compute the pseudo response of the current system
                boostTreeLoss.ComputePseudoResponse(workDataSet);

                //set the data and feature sampling rate for node spliting in this iteration
                featureSampler.SampleRate = dataFeatureSampleRate.SampleFeatureRate(m);
                dataSampler.SampleRate = dataFeatureSampleRate.SampleDataRate(m);

                // fit a residual model (regression trees) from the pesuso response
                // to compensate the error of the current system
                for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++)
                {
                    //only use the important data points if necessary
                    int[] trimIndex = boostTreeLoss.TrimIndex(workDataSet, k, m);

                    //build a regression tree according to the pseduo-response
                    this.regressionTrees[m, k] = new RegressionTree(this.labelFeatureDataCoded, boostTreeLoss, k, trimIndex,
                                                                    dataSampler, featureSampler, maxTreeSize, minNumSamples, this.findSplit, this.tempSpace);

                    //compute the function value of all data points produced by the newly generated regression tree
                    this.regressionTrees[m, k].PredictFunValue(this.labelFeatureDataCoded, ref funValueGain);

                    //try to do a more global optimalization - refine the leaf node response of a decision tree
                    //by looking at all the training data points, instead of only the ones falling into the regaion.
                    //Here we are estimate and apply a global mutiplication factor for all leaf nodes
                    float adjFactor = (m>0) ? boostTreeLoss.ComputeResponseAdjust(funValueGain) : 1.0F;

                    //apply the multiplication factor to the leaf nodes of the newly generated regression tree
                    this.regressionTrees[m, k].AdjustResponse(adjFactor);

                    //update the function value for all data points given the new regression tree
                    boostTreeLoss.AccFuncValueGain(funValueGain, adjFactor, k);
                }

                //compute the metrics of the current system
                boostTreeLoss.FuncValuesToModelScores();
                metrics.ComputeMetrics(boostTreeLoss.ModelScores, m + 1, false);
#if VERBOSE
                Console.WriteLine(metrics.ResultsStr(m+1));
#endif
                //keep track of the best (minimal Error) iteration on the Validation data set
                this.optIter = metrics.GetBest(DataPartitionType.Validation, ref minValidationErr);

                if ((m+1) % 5 == 0)  // save the tree every 5 iterations
                    SaveBoostTree();
            }

            if (this.findSplit != null)
            {
                this.findSplit.Cleanup();
            }
        }

Example #2

Show file

File: BoostTree.cs Project: zbxzc35/BoostTree

        public void Predict(LabelFeatureData labelFeatureData, int numIter,
                            BoostTreeLoss boostTreeLoss,
                            Metrics metrics, //reporting the error for each iteration if the following are set
                            bool silent // If true, only report results on the last iteration
                            )
        {
            if (numIter > this.TotalIter)
                numIter = this.TotalIter;

            boostTreeLoss.Reset(labelFeatureData.NumDataPoint);

            //(1) compute the probabilities produced by the sub-model
            boostTreeLoss.ModelEval(this.subModel, labelFeatureData, null);

            //(2) compute the corresponding function values;
            boostTreeLoss.ModelScoresToFuncValues();

            if (metrics != null)
            {
                metrics.ComputeMetrics(boostTreeLoss.ModelScores, 0, this.optIter == 0);
#if VERBOSE
                Console.WriteLine(metrics.ResultsHeaderStr());
                Console.WriteLine(metrics.ResultsStr(0));
#endif
            }

            //(3) accumulate the function values for each boosted regression tree
            int numSamples = labelFeatureData.NumDataPoint;
            float[] funValueGain = new float[numSamples];

#if GET_PER_DOC_PER_ITER_SCORES
            float[][] saveScores = ArrayUtils.FloatMatrix(numIter+2, labelFeatureData.NumDataPoint); // We will take transpose when we print
            for (int i = 0; i < labelFeatureData.NumDataPoint; ++i)
            {
                saveScores[0][i] = labelFeatureData.GetGroupId(i);
                saveScores[1][i] = labelFeatureData.GetLabel(i);
            }
#endif

            for (int m = 0; m < numIter; m++)
            {
                // fit a residual model (regression trees) from the pesuso response
                // to compensate the error of the current system
                for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++)
                {
                    if (this.regressionTrees[m, 0] == null)
                        break;
#if GET_PER_DOC_PER_ITER_SCORES
                    this.regressionTrees[m, k].PredictFunValueNKeepScores(labelFeatureData, this.Train2TestIdx, funValueGain, saveScores[m+2]);
#else
                    this.regressionTrees[m, k].PredictFunValue(labelFeatureData, this.Train2TestIdx, funValueGain);
#endif
                    boostTreeLoss.AccFuncValueGain(funValueGain, 1.0f, k);
                }


                if (metrics != null)
                {
                    //compute the metrics of the current system
                    boostTreeLoss.FuncValuesToModelScores();
                    metrics.ComputeMetrics(boostTreeLoss.ModelScores, m + 1, this.optIter == m + 1);
                    if(m==numIter-1 || !silent)
                        Console.WriteLine(metrics.ResultsStr(m + 1));
                }
            }

#if GET_PER_DOC_PER_ITER_SCORES
            using (StreamWriter sw = new StreamWriter("allScores.tsv"))
            {
                sw.Write("m:QueryID\tm:Rating"); // Write the header (with no tab at the end!)
                for (int j = 1; j < numIter+1; ++j)
                    sw.Write("\tFtr_" + j.ToString("0000"));
                sw.WriteLine();
                for (int j = 0; j < labelFeatureData.NumDataPoint; ++j)
                {
                    sw.Write("{0}\t{1}", saveScores[0][j], saveScores[1][j]); // Write the query ID and label
                    for (int m = 2; m < numIter + 2; ++m)
                        sw.Write("\t{0:G6}", saveScores[m][j]);
                    sw.WriteLine();
                }
            }
#endif

            if (metrics == null)
            {
                boostTreeLoss.FuncValuesToModelScores();
            }
            else
                metrics.SaveScores("DataScores.txt", boostTreeLoss.ModelScores);
        }

Example #3

Show file

File: BoostTree.cs Project: zbxzc35/BoostTree

        /// <summary>
        /// This method implements the main functionality of stochastic gradient boosting, for distributed computing
        /// </summary>
        private void DistributedBuildBoostTree(Metrics metrics, BoostTreeLoss boostTreeLoss, DataFeatureSampleRate dataFeatureSampleRate,
                                    int maxTreeSize, int minNumSamples, int numIter,
                                    int cThreads, Random r)
        {
            float minValidationErr = 100;

            float[] funValueGain = new float[this.numSamples];

            //(1) compute scores produced by the sub-model
            boostTreeLoss.ModelEval(this.subModel, this.labelFeatureDataCoded, this.subModelScore);

            //(2) compute the corresponding function values;
            boostTreeLoss.ModelScoresToFuncValues();

            //(3) compute the metrics of the sub-model
            int m = optIter = 0;
            metrics.ComputeMetrics(boostTreeLoss.ModelScores, m, false);
#if VERBOSE
            Console.WriteLine(metrics.ResultsHeaderStr());
            Console.WriteLine(metrics.ResultsStr(m));
#endif
            //(4) creat samplers to sub-sampl the features and data during node spliting
            RandomSampler featureSampler = new RandomSampler(r);
            RandomSampler dataSampler = new RandomSampler(r);

            //(5) creat the object that does node splitting
#if SINGLE_THREAD
            // single-threaded
             this.findSplit = new FindSplitSync();
#else
            // multi-threaded
            this.findSplit = new FindSplitAsync(cThreads);
#endif //SINGLE_THREAD

            //(6) Iteratively building boosted trees
            for (m = 0; m < numIter; m++)
            {
                //returns array of regression trees (one per class k) for this iteration
                RegressionTree[] candidateTree = GetNextWeakLearner(m, funValueGain, metrics,boostTreeLoss,dataFeatureSampleRate, dataSampler, featureSampler, maxTreeSize,minNumSamples,cThreads,r);

                AddWeakLearner(candidateTree, funValueGain, m, metrics, boostTreeLoss, dataFeatureSampleRate, maxTreeSize, minNumSamples, cThreads, r);

                //compute the metrics of the current system
                boostTreeLoss.FuncValuesToModelScores();
                metrics.ComputeMetrics(boostTreeLoss.ModelScores, m + 1, false);
#if VERBOSE
                Console.WriteLine(metrics.ResultsStr(m + 1));
#endif
                //keep track of the best (minimal Error) iteration on the Validation data set
                this.optIter = metrics.GetBest(DataPartitionType.Validation, ref minValidationErr);

                if ((m + 1) % 5 == 0)  // save the tree every 5 iterations
                    SaveBoostTree();
            }

            if (this.findSplit != null)
            {
                this.findSplit.Cleanup();
            }
        }