Example #1
0
        // The data are preprocessed (quantized) to avoid sorting
        public BoostTree(LabelFeatureDataCoded labelFeatureDataCoded, LabelFeatureData subModelScore,
                        Model subModel, BoostTreeLoss boostTreeLoss,
                        string saveTreeBinFile, string saveTreeTextFile)
        {
            this.labelFeatureDataCoded = labelFeatureDataCoded;
            this.subModelScore = subModelScore;

            UnpackData();

            this.subModel = subModel;
            this.boostTreeLoss = boostTreeLoss;
            this.featureNames = labelFeatureDataCoded.FeatureNames;

            this.saveTreeBinFile = saveTreeBinFile;
            this.saveTreeTextFile = saveTreeTextFile;
            this.saveTreeXmlFile = saveTreeTextFile + ".xml";
        }
Example #2
0
 private CLabelFeatureDataCodedComposite(LabelFeatureData[] labelFeatureDataElements)
     :
     base(labelFeatureDataElements)
 {
     //the LabelFeatureDataCoded object is always the first one in the input array
     this.labelFeatureDataCoded = (LabelFeatureDataCoded)labelFeatureDataElements[0];
 }
Example #3
0
        //the first one id always train data, which is always coded        
        public static CLabelFeatureDataCodedComposite Create(LabelFeatureDataCoded trainLabelFeatureDataCoded, LabelFeatureData validLabelFeatureData, LabelFeatureData testLabelFeatureData)         
        {
            if(trainLabelFeatureDataCoded == null)
            {
                return null;
            }

            int cPartition = (int)DataPartitionType.cTypes;

            List<LabelFeatureData> listLabelFeatureData = new List<LabelFeatureData>(cPartition);                        
            listLabelFeatureData.Add(trainLabelFeatureDataCoded);            
            listLabelFeatureData.Add(validLabelFeatureData);       
            listLabelFeatureData.Add(testLabelFeatureData);            
            LabelFeatureData[] labelFeatureDataArray = listLabelFeatureData.ToArray();

            CLabelFeatureDataCodedComposite labelFeatureData = new CLabelFeatureDataCodedComposite(labelFeatureDataArray);
           
            int[] cDataGroups = new int[cPartition];
            for (int i = 0; i < cPartition; i++)
            {
                cDataGroups[i] = 0;
                if (labelFeatureDataArray[i] != null)
                {
                    cDataGroups[i] = labelFeatureDataArray[i].DataGroups.GroupCounts;
                }
            }

            //train/valid/test data partition
            labelFeatureData.DataGroups.PartitionData(cDataGroups);

            return labelFeatureData;
        }
Example #4
0
        /// <summary>
        /// construct a single uniform LabelFeatureData from an array of such data objects
        /// Features: the features of the unified final object is the same as the features of the first object in the input array
        ///           If a feature does not exist/missing in the subsequent object, its value is set to zero
        /// DataPoints/index: the data points are aggragated together and indexed in the same order as the input object array
        /// DataGroups/index: the data groups are also agregrated together and indexed in the same order as the input object array
        /// </summary>
        /// <param name="labelFeatureDataElements"></param>
        public CLabelFeatureDataComposite(LabelFeatureData[] labelFeatureDataElements)
        {
            this.labelFeatureDataElements = labelFeatureDataElements;
            
            //number of the datapoints are the sum of the ones in the input object array
            numDataPoint = 0;
            for (int i = 0; i < this.labelFeatureDataElements.Length; i++)
            {
                if (this.labelFeatureDataElements[i] != null)
                {
                    this.numDataPoint += this.labelFeatureDataElements[i].NumDataPoint;
                }                
            }
                        
            //featureNames is set to the feature names of the first element
            this.featureNames = this.labelFeatureDataElements[0].FeatureNames;

            //index map used to look up the feature id of the original input object from that of the unified object
            this.idxMaps = new int[labelFeatureDataElements.Length][];
            this.accGroups = new int[labelFeatureDataElements.Length];
            for (int i = 0; i < this.labelFeatureDataElements.Length; i++)
            {
                this.idxMaps[i] = null;
                if (this.labelFeatureDataElements[i] != null)
                {
                    this.idxMaps[i] = new int[this.featureNames.Length];
                    for (int j = 0; j < this.featureNames.Length; j++)
                    {
                        string name = this.featureNames[j];
                        this.idxMaps[i][j] = -1;
                        int k = 0;
                        for (k = 0; k < this.labelFeatureDataElements[i].FeatureNames.Length; k++)
                        {
                            if (string.Compare(name, this.labelFeatureDataElements[i].FeatureNames[k], true) == 0)
                            {
                                this.idxMaps[i][j] = k;
                                break;
                            }
                        }

                        if (k >= this.labelFeatureDataElements[i].FeatureNames.Length)
                        {
                            Console.WriteLine("Feature " + name + " does not exist");
                        }
                    }
                }

                this.accGroups[i] = ((i==0)? 0 : this.accGroups[i-1]) + ((this.labelFeatureDataElements[i] == null)? 0 : this.labelFeatureDataElements[i].DataGroups.GroupCounts);
            }       
        }
        public TopNSet(LabelFeatureData subModelScore, int N)
        {
            this.Datakept = new bool[subModelScore.NumDataPoint];
            for (int i = 0; i < subModelScore.DataGroups.GroupCounts; i++)
            {
                DataGroup dataGroup = subModelScore.DataGroups[i];
                float[] scores = new float[dataGroup.cSize];
                int[] idx = new int[dataGroup.cSize];

                for (int j = 0; j < dataGroup.cSize; j++)
                {
                    this.Datakept[dataGroup.iStart + j] = false;
                    float[] features = subModelScore.GetFeature(dataGroup.iStart + j);
                    scores[j] = 0-features[0]; //sort is in increasing order
                    idx[j] = j;
                }

                Array.Sort(scores, idx);
                for (int j = 0; j < dataGroup.cSize; j++)
                {
                    if (j >= N)
                    {
                        break;
                    }
                    this.Datakept[dataGroup.iStart+idx[j]] = true;
                }
            }
        }
Example #6
0
 /// <summary>
 /// Compute and store the scores of the input data given a model
 /// </summary>
 /// <param name="model">the model to be evaluated</param>
 /// <param name="labelFeatureData">input data</param>        
 /// <param name="subModelScore">pre-computed scores for the input data</param>    
 public void ModelEval(Model model, LabelFeatureData labelFeatureData, LabelFeatureData subModelScore)
 {
     if (subModelScore != null)
     {
         Debug.Assert(this.numSamples == subModelScore.NumDataPoint);
         for (int i = 0; i < this.numSamples; i++)
         {
             this.score[i] = subModelScore.GetFeature(0, i);
         }
     }
     else if (model != null && labelFeatureData != null)
     {
         Debug.Assert(this.numSamples == labelFeatureData.NumDataPoint);
         float[] scores = new float[1];
         for (int i = 0; i < this.numSamples; i++)
         {
             model.Evaluate(labelFeatureData.GetFeature(i), scores);
             this.score[i] = scores[0];
         }
     }
     else
     {
         for (int i = 0; i < this.numSamples; i++)
         {
             this.score[i] = 0.0F;
         }
     }
 }
Example #7
0
 /// <summary>
 /// Compute and store the scores of the input data given a model
 /// </summary>
 /// <param name="model">the model to be evaluated</param>
 /// <param name="labelFeatureData">input data</param>        
 /// <param name="subModelScore">pre-computed scores for the input data</param>    
 public void ModelEval(Model model, LabelFeatureData labelFeatureData, LabelFeatureData subModelScore)
 {
     if (subModelScore != null)
     {
         Debug.Assert(this.numSamples == subModelScore.NumDataPoint);
         for (int i = 0; i < this.numSamples; i++)
         {
             for (int k = 0; k < this.numClass; k++)
             {
                 this.ModelScores[k][i] = subModelScore.GetFeature(k, i);
             }
         }
     }
     else if (model != null && labelFeatureData != null) // REVIEW by CJCB: this was 'if', I think it should be 'else if' so I changed it
     {
         Debug.Assert(this.numSamples == labelFeatureData.NumDataPoint);
         float[] scores = new float[this.numClass];
         for (int i = 0; i < this.numSamples; i++)
         {
             model.Evaluate(labelFeatureData.GetFeature(i), scores);
             for (int k = 0; k < this.numClass; k++)
             {
                 this.ModelScores[k][i] = scores[k];
             }
         }
     }
     else
     {
         //TODO: qiangwu - should we always requre subModel to exist
         //to make the default model value computation explicit???
         //It is probabaly safer that way revist the issue later
         float score = (float)1.0 / (float)this.numClass;
         for (int i = 0; i < this.numSamples; i++)
         {
             for (int k = 0; k < this.numClass; k++)
             {
                 this.ModelScores[k][i] = score;
             }
         }
     }
 }
Example #8
0
 public float[][] Predict(LabelFeatureData testData)
 {
     return Predict(testData, this.optIter);
 }
Example #9
0
        public float[][] Predict(LabelFeatureData testData, int numIter)
        {
            Predict(testData, numIter, this.boostTreeLoss, null, false);

            return this.boostTreeLoss.ModelScores;
        }
Example #10
0
 public void Predict(LabelFeatureData testData, int numIter, Metrics metrics, bool silent)
 {
     Predict(testData, numIter, this.boostTreeLoss, metrics, silent);
 }
Example #11
0
 public void Predict(LabelFeatureData testData, int numIter, Metrics metrics)
 {
     Predict(testData, numIter, this.boostTreeLoss, metrics, false);
 }
Example #12
0
 public void Predict(LabelFeatureData testData, int numIter,
                         BoostTreeLoss boostTreeLoss
                         )
 {
     Predict(testData, numIter, boostTreeLoss, null, false);
 }
Example #13
0
        public void Predict(LabelFeatureData labelFeatureData, int numIter,
                            BoostTreeLoss boostTreeLoss,
                            Metrics metrics, //reporting the error for each iteration if the following are set
                            bool silent // If true, only report results on the last iteration
                            )
        {
            if (numIter > this.TotalIter)
                numIter = this.TotalIter;

            boostTreeLoss.Reset(labelFeatureData.NumDataPoint);

            //(1) compute the probabilities produced by the sub-model
            boostTreeLoss.ModelEval(this.subModel, labelFeatureData, null);

            //(2) compute the corresponding function values;
            boostTreeLoss.ModelScoresToFuncValues();

            if (metrics != null)
            {
                metrics.ComputeMetrics(boostTreeLoss.ModelScores, 0, this.optIter == 0);
#if VERBOSE
                Console.WriteLine(metrics.ResultsHeaderStr());
                Console.WriteLine(metrics.ResultsStr(0));
#endif
            }

            //(3) accumulate the function values for each boosted regression tree
            int numSamples = labelFeatureData.NumDataPoint;
            float[] funValueGain = new float[numSamples];

#if GET_PER_DOC_PER_ITER_SCORES
            float[][] saveScores = ArrayUtils.FloatMatrix(numIter+2, labelFeatureData.NumDataPoint); // We will take transpose when we print
            for (int i = 0; i < labelFeatureData.NumDataPoint; ++i)
            {
                saveScores[0][i] = labelFeatureData.GetGroupId(i);
                saveScores[1][i] = labelFeatureData.GetLabel(i);
            }
#endif

            for (int m = 0; m < numIter; m++)
            {
                // fit a residual model (regression trees) from the pesuso response
                // to compensate the error of the current system
                for (int k = 0; k < boostTreeLoss.NumTreesPerIteration; k++)
                {
                    if (this.regressionTrees[m, 0] == null)
                        break;
#if GET_PER_DOC_PER_ITER_SCORES
                    this.regressionTrees[m, k].PredictFunValueNKeepScores(labelFeatureData, this.Train2TestIdx, funValueGain, saveScores[m+2]);
#else
                    this.regressionTrees[m, k].PredictFunValue(labelFeatureData, this.Train2TestIdx, funValueGain);
#endif
                    boostTreeLoss.AccFuncValueGain(funValueGain, 1.0f, k);
                }


                if (metrics != null)
                {
                    //compute the metrics of the current system
                    boostTreeLoss.FuncValuesToModelScores();
                    metrics.ComputeMetrics(boostTreeLoss.ModelScores, m + 1, this.optIter == m + 1);
                    if(m==numIter-1 || !silent)
                        Console.WriteLine(metrics.ResultsStr(m + 1));
                }
            }

#if GET_PER_DOC_PER_ITER_SCORES
            using (StreamWriter sw = new StreamWriter("allScores.tsv"))
            {
                sw.Write("m:QueryID\tm:Rating"); // Write the header (with no tab at the end!)
                for (int j = 1; j < numIter+1; ++j)
                    sw.Write("\tFtr_" + j.ToString("0000"));
                sw.WriteLine();
                for (int j = 0; j < labelFeatureData.NumDataPoint; ++j)
                {
                    sw.Write("{0}\t{1}", saveScores[0][j], saveScores[1][j]); // Write the query ID and label
                    for (int m = 2; m < numIter + 2; ++m)
                        sw.Write("\t{0:G6}", saveScores[m][j]);
                    sw.WriteLine();
                }
            }
#endif

            if (metrics == null)
            {
                boostTreeLoss.FuncValuesToModelScores();
            }
            else
                metrics.SaveScores("DataScores.txt", boostTreeLoss.ModelScores);
        }
Example #14
0
 //dataType==null <=> all the data are used in one partition
 public PrecRecall(LabelFeatureData labelFeatureData, LabelConverter labelConvert, DataPartitionType[] dataTypes)
     : base(labelFeatureData, labelConvert, dataTypes)
 {
     this.stats = new int[2, 2];
 }
Example #15
0
 public void PredictFunValueNKeepScores(LabelFeatureData data, int[] Train2TestIdx, float[] funValue, float[] keepScores)
 {
     Debug.Assert(data.NumDataPoint == funValue.Length);
     for (int i = 0; i < data.NumDataPoint; i++)
     {
         funValue[i] = PredictFunValue(data.GetFeature(i), Train2TestIdx);
         keepScores[i] = funValue[i];
     }
 }
Example #16
0
        public void PredictFunValue(LabelFeatureData data, int[] Train2TestIdx, float[] funValue)
        {
            Debug.Assert(data.NumDataPoint == funValue.Length);
            for (int i = 0; i < data.NumDataPoint; i++)
            {
                funValue[i] = PredictFunValue(data.GetFeature(i), Train2TestIdx);

                //int node = 0;
                //bool nextDataPoint = false;
                //while (nextDataPoint == false)
                //{
                //    if (this.tree[node].isTerminal)
                //    {
                //        funValues[i] = this.tree[node].regionValue;
                //        nextDataPoint = true;
                //        continue;
                //    }

                //    if (data[i][this.tree[node].split] <= this.tree[node].splitValue)
                //        node = this.tree[node].leftChild;
                //    else
                //        node = this.tree[node].rightChild;
                //}
            }
        }