示例#1
0
        // The data are preprocessed (quantized) to avoid sorting
        public BoostTree(LabelFeatureDataCoded labelFeatureDataCoded, LabelFeatureData subModelScore,
                        Model subModel, BoostTreeLoss boostTreeLoss,
                        string saveTreeBinFile, string saveTreeTextFile)
        {
            this.labelFeatureDataCoded = labelFeatureDataCoded;
            this.subModelScore = subModelScore;

            UnpackData();

            this.subModel = subModel;
            this.boostTreeLoss = boostTreeLoss;
            this.featureNames = labelFeatureDataCoded.FeatureNames;

            this.saveTreeBinFile = saveTreeBinFile;
            this.saveTreeTextFile = saveTreeTextFile;
            this.saveTreeXmlFile = saveTreeTextFile + ".xml";
        }
示例#2
0
        //the first one id always train data, which is always coded        
        public static CLabelFeatureDataCodedComposite Create(LabelFeatureDataCoded trainLabelFeatureDataCoded, LabelFeatureData validLabelFeatureData, LabelFeatureData testLabelFeatureData)         
        {
            if(trainLabelFeatureDataCoded == null)
            {
                return null;
            }

            int cPartition = (int)DataPartitionType.cTypes;

            List<LabelFeatureData> listLabelFeatureData = new List<LabelFeatureData>(cPartition);                        
            listLabelFeatureData.Add(trainLabelFeatureDataCoded);            
            listLabelFeatureData.Add(validLabelFeatureData);       
            listLabelFeatureData.Add(testLabelFeatureData);            
            LabelFeatureData[] labelFeatureDataArray = listLabelFeatureData.ToArray();

            CLabelFeatureDataCodedComposite labelFeatureData = new CLabelFeatureDataCodedComposite(labelFeatureDataArray);
           
            int[] cDataGroups = new int[cPartition];
            for (int i = 0; i < cPartition; i++)
            {
                cDataGroups[i] = 0;
                if (labelFeatureDataArray[i] != null)
                {
                    cDataGroups[i] = labelFeatureDataArray[i].DataGroups.GroupCounts;
                }
            }

            //train/valid/test data partition
            labelFeatureData.DataGroups.PartitionData(cDataGroups);

            return labelFeatureData;
        }
示例#3
0
        /// <summary>
        /// Constructor of BoostTreeRankNetLoss
        /// </summary>
        /// <param name="dp">the input data used to train boosted regression trees</param>
        /// <param name="learnRate">the input learning rate specified in training</param>
        public BoostTreeNoGainLambdaLoss(LabelFeatureDataCoded labelFeatureDataCoded, LabelConverter labelConvert,
                                   float learnRate, float labelWeight, StepSizeType stepSizeType, FindStepLib fs,
                                   float labelForUnlabeled, double scoreForDegenerateQuery, int truncLevel)
        {
            this.learnRate = learnRate;
            this.labelWeight = labelWeight;

            this.labelForUnlabeled = labelForUnlabeled;
            this.scoreForDegenerateQuery = scoreForDegenerateQuery;
            this.truncLevel = truncLevel;

            this.labels = new float[labelFeatureDataCoded.NumDataPoint];

            for (int i = 0; i < labelFeatureDataCoded.NumDataPoint; i++)
            {
                this.labels[i] = labelConvert.convert(labelFeatureDataCoded.GetLabel(i));
            }

            this.numSamples = labels.Length;

            this.score = new float[this.numSamples];
            this.funValue = new float[this.numSamples];
            this.pseudoResponses = new float[this.numSamples];
            this.weights = new float[this.numSamples];

            this.labelFeatureDataCoded = labelFeatureDataCoded;

            //data member to compute the optimal adjustment factor (step size) for leaf nodes response
            this.qcAccum = this.CreateQueryCollection();
            this.qcCurrent = this.CreateQueryCollection();
            this.fs = fs;
            this.stepSizeType = stepSizeType;
        }
示例#4
0
        /// <summary>
        /// Constructor of McBoostTreeLoss
        /// </summary>
        /// <param name="dp">the input data used to train boosted regression trees</param>
        /// <param name="learnRate">the input learning rate specified in training</param>
        public McBoostTreeLoss(LabelFeatureDataCoded labelFeatureDataCoded, LabelConverter labelConvert,
                               float learnRate)
        {
            this.learnRate = learnRate;

            this.numClass = 0;
            this.numSamples = labelFeatureDataCoded.NumDataPoint;
            this.classLabels = new int[labelFeatureDataCoded.NumDataPoint];
            for (int i = 0; i < this.numSamples; i++)
            {
                this.classLabels[i] = (int)labelConvert.convert(labelFeatureDataCoded.GetLabel(i));
                if (this.classLabels[i] + 1 > this.numClass)
                    this.numClass = this.classLabels[i] + 1;
            }

            //qiangwu: we probably don't need a matrix to store the label information though it is more convinent coding-wise
            this.classLabelsMatrix = BulidClassLabelsMatrix(this.classLabels, this.numClass);

            this.classProb = new float[this.numClass][];
            this.classFunValue = new float[this.numClass][];
            this.pseudoResponses = new float[this.numClass][];
            this.weights = new float[this.numClass][];

            for (int k = 0; k < this.numClass; k++)
            {
                this.classProb[k] = new float[this.numSamples];
                this.classFunValue[k] = new float[this.numSamples];
                this.pseudoResponses[k] = new float[this.numSamples];
                this.weights[k] = new float[this.numSamples];
            }

            //float[] weightsByLabel = WeightsByLabel(this.classLabels);
            //float[] probWeights = new float[this.numSamples];
        }
示例#5
0
        public BoostTreeRegressionLoss(LabelFeatureDataCoded labelFeatureDataCoded, 
                                       float learnRate)
        {
            this.learnRate = learnRate;

            this.numSamples = labelFeatureDataCoded.NumDataPoint;
            this.labels = new float[labelFeatureDataCoded.NumDataPoint];

            //for (int i = 0; i < labelFeatureDataCoded.NumDataPoint; i++)
            //{
            //    this.labels[i] = labelConvert.convert(labelFeatureDataCoded.GetLabel(i));
            //}
            for (int i = 0; i < labelFeatureDataCoded.NumDataPoint; i++)
            {
                this.labels[i] = labelFeatureDataCoded.GetLabel(i);
            }

            this.scores = new float[this.numSamples];
            this.funValues = new float[this.numSamples];
            this.pseudoResponses = new float[this.numSamples];
            //this.weights = new float[this.numSamples];

            this.labelFeatureDataCoded = labelFeatureDataCoded;
        }
示例#6
0
 public BoostTreeRegressionL2Loss(LabelFeatureDataCoded labelFeatureDataCoded,
                                float learnRate)
     : base(labelFeatureDataCoded, learnRate)
 {
 }
示例#7
0
 ///distributed version of Predicting Function Value, for use with uncoded data, but 
 ///data is guaranteed to have same order of features since it is training data that was
 ///split across nodes. Do not use function if unknown ordering of features.
 public void PredictFunValue(LabelFeatureDataCoded data, bool b, ref float[] funValue)
 {
     Debug.Assert(data.NumDataPoint == funValue.Length);
     for (int i = 0; i < data.NumDataPoint; i++)
     {
         funValue[i] = PredictFunValue(data.GetFeature(i));
     }
 }
示例#8
0
        public void PredictFunValue(LabelFeatureDataCoded data, ref float[] funValue)
        {
            for (int i = 0; i < funValue.Length; i++)
            {
                int node = 0;
                bool nextDataPoint = false;
                while (nextDataPoint == false)
                {
                    if (this.tree[node].isTerminal)
                    {
                        funValue[i] = this.tree[node].regionValue;
                        nextDataPoint = true;
                        continue;
                    }

                    if (data.GetFeatureCoded(this.tree[node].split, i) <= this.tree[node].splitValueCoded)
                        node = this.tree[node].leftChild;
                    else
                        node = this.tree[node].rightChild;
                }
            }
        }
示例#9
0
        public RegressionTree(LabelFeatureDataCoded labelFeatureDataCoded, BoostTreeLoss boostTreeLoss, int iTree, int[] workIndex,
                              RandomSampler featureSampler, RandomSampler dataSampler,
                              int maxTreeSize, int minNumSamples,
                              IFindSplit findSplit, TempSpace tempSpace)
        {
            this.labelFeatureDataCoded = labelFeatureDataCoded;
            this.workIndex = workIndex;
            this.numFeatures = labelFeatureDataCoded.NumFeatures;
            this.maxTreeSize = maxTreeSize;
            this.featureImportance = new float[this.numFeatures];
            this.minNumSamples = minNumSamples;

            //distributed setting
            this.adjustFactor = 1.0F;

            InitTempSpace(tempSpace);
            BuildRegressionTree(boostTreeLoss, iTree, findSplit, dataSampler, featureSampler);
            GC.Collect(); // hope for the best!!!
        }
示例#10
0
        public Split FindBestSplit(LabelFeatureDataCoded labelFeatureDataCoded, float[] responses,
                            int[] dataPoints, int[] workIndex, RandomSampler featureSampler,
                            RandomSampler dataSampler, int minNumSamples)
        {
            findSplitObj.SetData(labelFeatureDataCoded, responses,
                                 dataPoints, workIndex,
                                 0, featureSampler.SampleSize, featureSampler, dataSampler,
                                 minNumSamples);
            findSplitObj.Find();

            return findSplitObj.bestSplit;
        }
示例#11
0
        public void SetData(LabelFeatureDataCoded LabelFeatureDataCoded, float[] responses,
                            int[] dataPoints, int[] workIndex,
                            int iStart, int iEnd, RandomSampler featureSampler,
                            RandomSampler dataSampler, int minNumSamples)
        {
            this.LabelFeatureDataCoded = LabelFeatureDataCoded;
            this.responses = responses;

            this.dataPoints = dataPoints;
            this.workIndex = workIndex;

            this.iStart = iStart;
            this.iEnd = iEnd;
            this.featureSampler = featureSampler;

            this.dataSampler = dataSampler;

            this.minNumSamples = minNumSamples;
        }
示例#12
0
        public Split FindBestSplit(LabelFeatureDataCoded labelFeatureDataCoded, float[] responses,
                            int[] dataPoints, int[] workIndex, RandomSampler featureSampler,
                            RandomSampler dataSampler, int minNumSamples)
        {
            InitThreads();

            for (int i = 0; i < featureSampler.SampleSize; i++)
            {
                //wait for any of the thread to finish
                int iThread = WaitHandle.WaitAny(this.DoneEvents);
                DoneEvents[iThread].Reset();

                FindSplitObj_Thread threadObj = findSplitThreadObjList[iThread];

                //update the bestSplit given the result of just finished thread
                if (threadObj.bestSplit > bestSplit)
                {
                    threadObj.bestSplit.CopyTo(bestSplit);
                }

                //assign the data to the thread
                threadObj.SetData(labelFeatureDataCoded, responses,
                                 dataPoints, workIndex, i, i+1, featureSampler, dataSampler, minNumSamples);

                //set the thread into motion
                StartEvents[iThread].Set();
            }

            WaitHandle.WaitAll(DoneEvents);

            for (int i = 0; i < this.cThreads; i++)
            {
                FindSplitObj_Thread threadObj = findSplitThreadObjList[i];

                if (threadObj.bestSplit > bestSplit)
                {
                    threadObj.bestSplit.CopyTo(bestSplit);
                }
            }

            return bestSplit;
        }