// The data are preprocessed (quantized) to avoid sorting public BoostTree(LabelFeatureDataCoded labelFeatureDataCoded, LabelFeatureData subModelScore, Model subModel, BoostTreeLoss boostTreeLoss, string saveTreeBinFile, string saveTreeTextFile) { this.labelFeatureDataCoded = labelFeatureDataCoded; this.subModelScore = subModelScore; UnpackData(); this.subModel = subModel; this.boostTreeLoss = boostTreeLoss; this.featureNames = labelFeatureDataCoded.FeatureNames; this.saveTreeBinFile = saveTreeBinFile; this.saveTreeTextFile = saveTreeTextFile; this.saveTreeXmlFile = saveTreeTextFile + ".xml"; }
//the first one id always train data, which is always coded public static CLabelFeatureDataCodedComposite Create(LabelFeatureDataCoded trainLabelFeatureDataCoded, LabelFeatureData validLabelFeatureData, LabelFeatureData testLabelFeatureData) { if(trainLabelFeatureDataCoded == null) { return null; } int cPartition = (int)DataPartitionType.cTypes; List<LabelFeatureData> listLabelFeatureData = new List<LabelFeatureData>(cPartition); listLabelFeatureData.Add(trainLabelFeatureDataCoded); listLabelFeatureData.Add(validLabelFeatureData); listLabelFeatureData.Add(testLabelFeatureData); LabelFeatureData[] labelFeatureDataArray = listLabelFeatureData.ToArray(); CLabelFeatureDataCodedComposite labelFeatureData = new CLabelFeatureDataCodedComposite(labelFeatureDataArray); int[] cDataGroups = new int[cPartition]; for (int i = 0; i < cPartition; i++) { cDataGroups[i] = 0; if (labelFeatureDataArray[i] != null) { cDataGroups[i] = labelFeatureDataArray[i].DataGroups.GroupCounts; } } //train/valid/test data partition labelFeatureData.DataGroups.PartitionData(cDataGroups); return labelFeatureData; }
/// <summary> /// Constructor of BoostTreeRankNetLoss /// </summary> /// <param name="dp">the input data used to train boosted regression trees</param> /// <param name="learnRate">the input learning rate specified in training</param> public BoostTreeNoGainLambdaLoss(LabelFeatureDataCoded labelFeatureDataCoded, LabelConverter labelConvert, float learnRate, float labelWeight, StepSizeType stepSizeType, FindStepLib fs, float labelForUnlabeled, double scoreForDegenerateQuery, int truncLevel) { this.learnRate = learnRate; this.labelWeight = labelWeight; this.labelForUnlabeled = labelForUnlabeled; this.scoreForDegenerateQuery = scoreForDegenerateQuery; this.truncLevel = truncLevel; this.labels = new float[labelFeatureDataCoded.NumDataPoint]; for (int i = 0; i < labelFeatureDataCoded.NumDataPoint; i++) { this.labels[i] = labelConvert.convert(labelFeatureDataCoded.GetLabel(i)); } this.numSamples = labels.Length; this.score = new float[this.numSamples]; this.funValue = new float[this.numSamples]; this.pseudoResponses = new float[this.numSamples]; this.weights = new float[this.numSamples]; this.labelFeatureDataCoded = labelFeatureDataCoded; //data member to compute the optimal adjustment factor (step size) for leaf nodes response this.qcAccum = this.CreateQueryCollection(); this.qcCurrent = this.CreateQueryCollection(); this.fs = fs; this.stepSizeType = stepSizeType; }
/// <summary> /// Constructor of McBoostTreeLoss /// </summary> /// <param name="dp">the input data used to train boosted regression trees</param> /// <param name="learnRate">the input learning rate specified in training</param> public McBoostTreeLoss(LabelFeatureDataCoded labelFeatureDataCoded, LabelConverter labelConvert, float learnRate) { this.learnRate = learnRate; this.numClass = 0; this.numSamples = labelFeatureDataCoded.NumDataPoint; this.classLabels = new int[labelFeatureDataCoded.NumDataPoint]; for (int i = 0; i < this.numSamples; i++) { this.classLabels[i] = (int)labelConvert.convert(labelFeatureDataCoded.GetLabel(i)); if (this.classLabels[i] + 1 > this.numClass) this.numClass = this.classLabels[i] + 1; } //qiangwu: we probably don't need a matrix to store the label information though it is more convinent coding-wise this.classLabelsMatrix = BulidClassLabelsMatrix(this.classLabels, this.numClass); this.classProb = new float[this.numClass][]; this.classFunValue = new float[this.numClass][]; this.pseudoResponses = new float[this.numClass][]; this.weights = new float[this.numClass][]; for (int k = 0; k < this.numClass; k++) { this.classProb[k] = new float[this.numSamples]; this.classFunValue[k] = new float[this.numSamples]; this.pseudoResponses[k] = new float[this.numSamples]; this.weights[k] = new float[this.numSamples]; } //float[] weightsByLabel = WeightsByLabel(this.classLabels); //float[] probWeights = new float[this.numSamples]; }
public BoostTreeRegressionLoss(LabelFeatureDataCoded labelFeatureDataCoded, float learnRate) { this.learnRate = learnRate; this.numSamples = labelFeatureDataCoded.NumDataPoint; this.labels = new float[labelFeatureDataCoded.NumDataPoint]; //for (int i = 0; i < labelFeatureDataCoded.NumDataPoint; i++) //{ // this.labels[i] = labelConvert.convert(labelFeatureDataCoded.GetLabel(i)); //} for (int i = 0; i < labelFeatureDataCoded.NumDataPoint; i++) { this.labels[i] = labelFeatureDataCoded.GetLabel(i); } this.scores = new float[this.numSamples]; this.funValues = new float[this.numSamples]; this.pseudoResponses = new float[this.numSamples]; //this.weights = new float[this.numSamples]; this.labelFeatureDataCoded = labelFeatureDataCoded; }
public BoostTreeRegressionL2Loss(LabelFeatureDataCoded labelFeatureDataCoded, float learnRate) : base(labelFeatureDataCoded, learnRate) { }
///distributed version of Predicting Function Value, for use with uncoded data, but ///data is guaranteed to have same order of features since it is training data that was ///split across nodes. Do not use function if unknown ordering of features. public void PredictFunValue(LabelFeatureDataCoded data, bool b, ref float[] funValue) { Debug.Assert(data.NumDataPoint == funValue.Length); for (int i = 0; i < data.NumDataPoint; i++) { funValue[i] = PredictFunValue(data.GetFeature(i)); } }
public void PredictFunValue(LabelFeatureDataCoded data, ref float[] funValue) { for (int i = 0; i < funValue.Length; i++) { int node = 0; bool nextDataPoint = false; while (nextDataPoint == false) { if (this.tree[node].isTerminal) { funValue[i] = this.tree[node].regionValue; nextDataPoint = true; continue; } if (data.GetFeatureCoded(this.tree[node].split, i) <= this.tree[node].splitValueCoded) node = this.tree[node].leftChild; else node = this.tree[node].rightChild; } } }
public RegressionTree(LabelFeatureDataCoded labelFeatureDataCoded, BoostTreeLoss boostTreeLoss, int iTree, int[] workIndex, RandomSampler featureSampler, RandomSampler dataSampler, int maxTreeSize, int minNumSamples, IFindSplit findSplit, TempSpace tempSpace) { this.labelFeatureDataCoded = labelFeatureDataCoded; this.workIndex = workIndex; this.numFeatures = labelFeatureDataCoded.NumFeatures; this.maxTreeSize = maxTreeSize; this.featureImportance = new float[this.numFeatures]; this.minNumSamples = minNumSamples; //distributed setting this.adjustFactor = 1.0F; InitTempSpace(tempSpace); BuildRegressionTree(boostTreeLoss, iTree, findSplit, dataSampler, featureSampler); GC.Collect(); // hope for the best!!! }
public Split FindBestSplit(LabelFeatureDataCoded labelFeatureDataCoded, float[] responses, int[] dataPoints, int[] workIndex, RandomSampler featureSampler, RandomSampler dataSampler, int minNumSamples) { findSplitObj.SetData(labelFeatureDataCoded, responses, dataPoints, workIndex, 0, featureSampler.SampleSize, featureSampler, dataSampler, minNumSamples); findSplitObj.Find(); return findSplitObj.bestSplit; }
public void SetData(LabelFeatureDataCoded LabelFeatureDataCoded, float[] responses, int[] dataPoints, int[] workIndex, int iStart, int iEnd, RandomSampler featureSampler, RandomSampler dataSampler, int minNumSamples) { this.LabelFeatureDataCoded = LabelFeatureDataCoded; this.responses = responses; this.dataPoints = dataPoints; this.workIndex = workIndex; this.iStart = iStart; this.iEnd = iEnd; this.featureSampler = featureSampler; this.dataSampler = dataSampler; this.minNumSamples = minNumSamples; }
public Split FindBestSplit(LabelFeatureDataCoded labelFeatureDataCoded, float[] responses, int[] dataPoints, int[] workIndex, RandomSampler featureSampler, RandomSampler dataSampler, int minNumSamples) { InitThreads(); for (int i = 0; i < featureSampler.SampleSize; i++) { //wait for any of the thread to finish int iThread = WaitHandle.WaitAny(this.DoneEvents); DoneEvents[iThread].Reset(); FindSplitObj_Thread threadObj = findSplitThreadObjList[iThread]; //update the bestSplit given the result of just finished thread if (threadObj.bestSplit > bestSplit) { threadObj.bestSplit.CopyTo(bestSplit); } //assign the data to the thread threadObj.SetData(labelFeatureDataCoded, responses, dataPoints, workIndex, i, i+1, featureSampler, dataSampler, minNumSamples); //set the thread into motion StartEvents[iThread].Set(); } WaitHandle.WaitAll(DoneEvents); for (int i = 0; i < this.cThreads; i++) { FindSplitObj_Thread threadObj = findSplitThreadObjList[i]; if (threadObj.bestSplit > bestSplit) { threadObj.bestSplit.CopyTo(bestSplit); } } return bestSplit; }