示例#1
0
文件: CARTTest.cs 项目: Dasmic/MLLib
        public void CART_maketree_check_all_validation_data_set()
        {
            initData_Jason();
            BuildCART cart =
                new BuildCART();

            cart.SetMissingValue(999);
            cart.SetParameters(1);

            ModelCART mb =
                (ModelCART)cart.BuildModel(_trainingData,
                                           _attributeHeaders,
                                           _indexTargetAttribute);

            int count = 0;

            for (int row = 0; row < _validationData[0].Length; row++)
            {
                double[] data  = new double[] { _validationData[0][row], _validationData[1][row] };
                double   value = mb.RunModelForSingleData(data);
                if (value == _validationData[2][row])
                {
                    count++;
                }
            }
            Assert.AreEqual(count, 9);
        }
示例#2
0
        double RunModelForSingleData(double[] data)
        {
            VerifyDataForRun(data);
            double finalValue = 0;

            double[] values = new double[_allTrees.Length];
            //Iterate through all trees
            object monitor = new object();

            Parallel.For(0, _allTrees.Length,
                         new ParallelOptions {
                MaxDegreeOfParallelism = _maxParallelThreads
            },
                         idx =>
                         //for(int idx=0;idx<allTrees.Count;idx++)
            {
                ModelCART modelCart = _allTrees[idx];
                values[idx]         =
                    _allStages[idx] *
                    modelCart.RunModelForSingleData(data);
                lock (monitor)
                    finalValue += values[idx];
            });


            //double finalValue = Dispersion.Mode(values);
            return(finalValue);
        }
示例#3
0
 public void AddTree(double stage,
                     long idxTree, ModelCART tree)
 {
     //Cannot use a Dictionary here as stage can be duplicated
     //Hence have to use this approach
     base.AddTree(idxTree, tree);
     _allStages.Add(stage);
 }
示例#4
0
        public override Common.MLCore.ModelBase BuildModel(
            double[][] trainingData,
            string[] attributeHeaders,
            int indexTargetAttribute)
        {
            //Verify data and set variables
            VerifyData(trainingData, attributeHeaders, indexTargetAttribute);

            ModelBaggedDecisionTree model =
                new ModelBaggedDecisionTree(_missingValue,
                                            _indexTargetAttribute,
                                            _trainingData.Length - 1,
                                            _numberOfTrees);

            //By default samples/tree is same as original samples
            if (_numberOfSamplesPerTree == long.MaxValue)
            {
                _numberOfSamplesPerTree = _trainingData[0].Length;
            }

            //Split the data for each tree
            //Parallelize this
            Parallel.For(0, _numberOfTrees, new ParallelOptions {
                MaxDegreeOfParallelism = _maxParallelThreads
            }, ii =>
                         //for (int ii=0;ii<_numberOfTrees;ii++)
            {
                Random rnd = new Random();
                ConcurrentBag <long> trainingDataRowIndices =
                    new ConcurrentBag <long>();

                //Initialize the rows
                for (long idx = 0;
                     idx < _numberOfSamplesPerTree; idx++)
                {
                    //Random Sampling with Replacement
                    long rowIdx = rnd.Next(0, _trainingData[0].Length - 1);

                    //rowIdx = idx + startIdx;
                    //rowIdx = rowIdx > _noOfDataSamples - 1 ?
                    //                    rowIdx - _noOfDataSamples : rowIdx;
                    trainingDataRowIndices.Add(rowIdx);
                }

                //For test only
                BuildCART buildCart = new BuildCART();
                ModelCART modelCart = (ModelCART)buildCart.BuildModel(trainingData,
                                                                      attributeHeaders,
                                                                      indexTargetAttribute,
                                                                      trainingDataRowIndices);
                model.AddTree(ii, modelCart);
            }); //Number of trees

            return(model);
        }
示例#5
0
文件: CARTTest.cs 项目: Dasmic/MLLib
        public void CART_maketree_check_root_node_value()
        {
            initData_Jason();
            BuildCART cart =
                new BuildCART();

            cart.SetMissingValue(999);
            cart.SetParameters(1);

            ModelCART mb =
                (ModelCART)cart.BuildModel(_trainingData,
                                           _attributeHeaders,
                                           _indexTargetAttribute);

            Assert.AreEqual(mb.Root.AttributeIndex, 0);
            Assert.AreEqual(mb.Root.Value, 6.642287351);
        }
示例#6
0
文件: CARTTest.cs 项目: Dasmic/MLLib
        public void CART_maketree_special_no_splitting_possible_true()
        {
            initData_special_no_splitting_possible();
            BuildCART cart =
                new BuildCART();
            ModelCART model =
                (ModelCART)cart.BuildModel(_trainingData,
                                           _attributeHeaders,
                                           _indexTargetAttribute);

            int row = 1;

            double[] data  = GetSingleTrainingRowDataForTest(row);
            double   value = model.RunModelForSingleData(data);

            Assert.AreEqual(value,
                            _trainingData[_indexTargetAttribute][row]);
        }
示例#7
0
文件: CARTTest.cs 项目: Dasmic/MLLib
        public void CART_maketree_validate_single_training_data()
        {
            initData_Jason();
            BuildCART cart =
                new BuildCART();

            cart.SetMissingValue(999);
            cart.SetParameters(1);

            ModelCART mb =
                (ModelCART)cart.BuildModel(_trainingData,
                                           _attributeHeaders,
                                           _indexTargetAttribute);

            double[] data  = new double[] { _trainingData[0][0], _trainingData[1][0] };
            double   value = mb.RunModelForSingleData(data);

            Assert.AreEqual(value, _trainingData[2][0]);
        }
示例#8
0
        public override Common.MLCore.ModelBase BuildModel(
            double[][] trainingData,
            string[] attributeHeaders,
            int indexTargetAttribute)
        {
            //Verify data and set variables
            VerifyData(trainingData, attributeHeaders, indexTargetAttribute);

            ModelRandomForest model =
                new ModelRandomForest(_missingValue,
                                      _indexTargetAttribute,
                                      _trainingData.Length - 1,
                                      _numberOfTrees);

            //By default samples/tree is same as original samples
            if (_numberOfFeaturesPerTree == int.MaxValue)
            {
                _numberOfFeaturesPerTree = (int)System.Math.Ceiling(
                    System.Math.Sqrt((double)_trainingData.Length));
            }

            //Create for each
            //Parallelize this
            Parallel.For(0, _numberOfTrees,
                         new ParallelOptions {
                MaxDegreeOfParallelism = _maxParallelThreads
            }, ii =>
                         //for (int ii = 0; ii < _numberOfTrees; ii++)
            {
                //For test only
                BuildCART buildCart = new BuildCART();
                buildCart.SetParametersForRandomForest(_numberOfFeaturesPerTree);
                ModelCART modelCart = (ModelCART)buildCart.BuildModel(trainingData,
                                                                      attributeHeaders,
                                                                      indexTargetAttribute);
                model.AddTree(ii, modelCart);
            });//Number of trees

            return(model);
        }
示例#9
0
文件: ModelBase.cs 项目: Dasmic/MLLib
 public virtual void AddTree(long idxTree,
                             ModelCART tree)
 {
     _allTrees[idxTree] = tree;
 }
示例#10
0
 public void AddTree(long idxTree, ModelCART tree, long[] index)
 {
     _allTrees[idxTree] = tree;
 }
示例#11
0
        public override Common.MLCore.ModelBase BuildModel(
            double[][] trainingData,
            string[] attributeHeaders,
            int indexTargetAttribute)
        {
            //Verify data and set variables
            VerifyData(trainingData, attributeHeaders, indexTargetAttribute);

            double[] weight = new double[trainingData[0].Length];
            int[]    error  = new int[trainingData[0].Length];
            //Initialize weights to 1/N
            double value = 1.0 / (double)weight.Length;

            Parallel.For(0, weight.Length, new ParallelOptions {
                MaxDegreeOfParallelism = _maxParallelThreads
            }, idx =>
            {
                weight[idx] = value;
            });

            ModelAdaBoost model =
                new ModelAdaBoost(_missingValue,
                                  _indexTargetAttribute,
                                  _trainingData.Length - 1,
                                  _numberOfTrees);

            //Split the data for each tree
            //Parallelize this
            Parallel.For(0, _numberOfTrees, new ParallelOptions {
                MaxDegreeOfParallelism = _maxParallelThreads
            }, ii =>
                         //for (int ii=0;ii<_numberOfTrees;ii++)
            {
                //For test only
                BuildCARTBoost buildCart = new BuildCARTBoost();
                ModelCART modelCart      = (ModelCART)buildCart.BuildModel(trainingData,
                                                                           attributeHeaders,
                                                                           indexTargetAttribute,
                                                                           weight);
                double[] data;
                double sumWeights = 0, sumWeightedError = 0;

                //Compute Error and Sum of Weights
                for (int rowIdx = 0;
                     rowIdx < _trainingData[0].Length;
                     rowIdx++)
                {
                    data  = GetLinearArray(_trainingData, rowIdx, _trainingData.Length - 2);
                    value = modelCart.RunModelForSingleData(data);
                    if (SupportFunctions.DoubleCompare(value,
                                                       _trainingData[_indexTargetAttribute][rowIdx]))
                    {
                        error[rowIdx] = 0;
                    }
                    else
                    {
                        error[rowIdx] = 1;
                    }

                    //Compute data for mis-classification rate
                    //This has to be done after error computation
                    sumWeights       += weight[rowIdx];
                    sumWeightedError += weight[rowIdx] * error[rowIdx];
                }

                double misClassificationRate = (sumWeightedError + _epsilon) / (sumWeights + _epsilon);

                //Diagnostics Messages
                System.Diagnostics.Debug.WriteLine("\n\nSumWeights:"
                                                   + sumWeights.ToString());
                System.Diagnostics.Debug.WriteLine("Sum Weighted Error:"
                                                   + sumWeightedError.ToString());
                System.Diagnostics.Debug.WriteLine("MisClassification Rate:"
                                                   + misClassificationRate.ToString());

                //Compute Stage
                double stage = System.Math.Log(
                    (1.0 - misClassificationRate)
                    / misClassificationRate);

                System.Diagnostics.Debug.WriteLine("Stage:"
                                                   + stage.ToString());

                model.AddTree(stage, ii, modelCart);

                //Update the Weights
                for (int rowIdx = 0;
                     rowIdx < _trainingData[0].Length;
                     rowIdx++)
                {
                    weight[rowIdx] = weight[rowIdx] *
                                     System.Math.Pow(System.Math.E,
                                                     stage * error[rowIdx]);
                }
            }); //Number of trees


            return(model);
        }