public void CART_maketree_check_all_validation_data_set() { initData_Jason(); BuildCART cart = new BuildCART(); cart.SetMissingValue(999); cart.SetParameters(1); ModelCART mb = (ModelCART)cart.BuildModel(_trainingData, _attributeHeaders, _indexTargetAttribute); int count = 0; for (int row = 0; row < _validationData[0].Length; row++) { double[] data = new double[] { _validationData[0][row], _validationData[1][row] }; double value = mb.RunModelForSingleData(data); if (value == _validationData[2][row]) { count++; } } Assert.AreEqual(count, 9); }
double RunModelForSingleData(double[] data) { VerifyDataForRun(data); double finalValue = 0; double[] values = new double[_allTrees.Length]; //Iterate through all trees object monitor = new object(); Parallel.For(0, _allTrees.Length, new ParallelOptions { MaxDegreeOfParallelism = _maxParallelThreads }, idx => //for(int idx=0;idx<allTrees.Count;idx++) { ModelCART modelCart = _allTrees[idx]; values[idx] = _allStages[idx] * modelCart.RunModelForSingleData(data); lock (monitor) finalValue += values[idx]; }); //double finalValue = Dispersion.Mode(values); return(finalValue); }
public void AddTree(double stage, long idxTree, ModelCART tree) { //Cannot use a Dictionary here as stage can be duplicated //Hence have to use this approach base.AddTree(idxTree, tree); _allStages.Add(stage); }
public override Common.MLCore.ModelBase BuildModel( double[][] trainingData, string[] attributeHeaders, int indexTargetAttribute) { //Verify data and set variables VerifyData(trainingData, attributeHeaders, indexTargetAttribute); ModelBaggedDecisionTree model = new ModelBaggedDecisionTree(_missingValue, _indexTargetAttribute, _trainingData.Length - 1, _numberOfTrees); //By default samples/tree is same as original samples if (_numberOfSamplesPerTree == long.MaxValue) { _numberOfSamplesPerTree = _trainingData[0].Length; } //Split the data for each tree //Parallelize this Parallel.For(0, _numberOfTrees, new ParallelOptions { MaxDegreeOfParallelism = _maxParallelThreads }, ii => //for (int ii=0;ii<_numberOfTrees;ii++) { Random rnd = new Random(); ConcurrentBag <long> trainingDataRowIndices = new ConcurrentBag <long>(); //Initialize the rows for (long idx = 0; idx < _numberOfSamplesPerTree; idx++) { //Random Sampling with Replacement long rowIdx = rnd.Next(0, _trainingData[0].Length - 1); //rowIdx = idx + startIdx; //rowIdx = rowIdx > _noOfDataSamples - 1 ? // rowIdx - _noOfDataSamples : rowIdx; trainingDataRowIndices.Add(rowIdx); } //For test only BuildCART buildCart = new BuildCART(); ModelCART modelCart = (ModelCART)buildCart.BuildModel(trainingData, attributeHeaders, indexTargetAttribute, trainingDataRowIndices); model.AddTree(ii, modelCart); }); //Number of trees return(model); }
public void CART_maketree_check_root_node_value() { initData_Jason(); BuildCART cart = new BuildCART(); cart.SetMissingValue(999); cart.SetParameters(1); ModelCART mb = (ModelCART)cart.BuildModel(_trainingData, _attributeHeaders, _indexTargetAttribute); Assert.AreEqual(mb.Root.AttributeIndex, 0); Assert.AreEqual(mb.Root.Value, 6.642287351); }
public void CART_maketree_special_no_splitting_possible_true() { initData_special_no_splitting_possible(); BuildCART cart = new BuildCART(); ModelCART model = (ModelCART)cart.BuildModel(_trainingData, _attributeHeaders, _indexTargetAttribute); int row = 1; double[] data = GetSingleTrainingRowDataForTest(row); double value = model.RunModelForSingleData(data); Assert.AreEqual(value, _trainingData[_indexTargetAttribute][row]); }
public void CART_maketree_validate_single_training_data() { initData_Jason(); BuildCART cart = new BuildCART(); cart.SetMissingValue(999); cart.SetParameters(1); ModelCART mb = (ModelCART)cart.BuildModel(_trainingData, _attributeHeaders, _indexTargetAttribute); double[] data = new double[] { _trainingData[0][0], _trainingData[1][0] }; double value = mb.RunModelForSingleData(data); Assert.AreEqual(value, _trainingData[2][0]); }
public override Common.MLCore.ModelBase BuildModel( double[][] trainingData, string[] attributeHeaders, int indexTargetAttribute) { //Verify data and set variables VerifyData(trainingData, attributeHeaders, indexTargetAttribute); ModelRandomForest model = new ModelRandomForest(_missingValue, _indexTargetAttribute, _trainingData.Length - 1, _numberOfTrees); //By default samples/tree is same as original samples if (_numberOfFeaturesPerTree == int.MaxValue) { _numberOfFeaturesPerTree = (int)System.Math.Ceiling( System.Math.Sqrt((double)_trainingData.Length)); } //Create for each //Parallelize this Parallel.For(0, _numberOfTrees, new ParallelOptions { MaxDegreeOfParallelism = _maxParallelThreads }, ii => //for (int ii = 0; ii < _numberOfTrees; ii++) { //For test only BuildCART buildCart = new BuildCART(); buildCart.SetParametersForRandomForest(_numberOfFeaturesPerTree); ModelCART modelCart = (ModelCART)buildCart.BuildModel(trainingData, attributeHeaders, indexTargetAttribute); model.AddTree(ii, modelCart); });//Number of trees return(model); }
public virtual void AddTree(long idxTree, ModelCART tree) { _allTrees[idxTree] = tree; }
public void AddTree(long idxTree, ModelCART tree, long[] index) { _allTrees[idxTree] = tree; }
public override Common.MLCore.ModelBase BuildModel( double[][] trainingData, string[] attributeHeaders, int indexTargetAttribute) { //Verify data and set variables VerifyData(trainingData, attributeHeaders, indexTargetAttribute); double[] weight = new double[trainingData[0].Length]; int[] error = new int[trainingData[0].Length]; //Initialize weights to 1/N double value = 1.0 / (double)weight.Length; Parallel.For(0, weight.Length, new ParallelOptions { MaxDegreeOfParallelism = _maxParallelThreads }, idx => { weight[idx] = value; }); ModelAdaBoost model = new ModelAdaBoost(_missingValue, _indexTargetAttribute, _trainingData.Length - 1, _numberOfTrees); //Split the data for each tree //Parallelize this Parallel.For(0, _numberOfTrees, new ParallelOptions { MaxDegreeOfParallelism = _maxParallelThreads }, ii => //for (int ii=0;ii<_numberOfTrees;ii++) { //For test only BuildCARTBoost buildCart = new BuildCARTBoost(); ModelCART modelCart = (ModelCART)buildCart.BuildModel(trainingData, attributeHeaders, indexTargetAttribute, weight); double[] data; double sumWeights = 0, sumWeightedError = 0; //Compute Error and Sum of Weights for (int rowIdx = 0; rowIdx < _trainingData[0].Length; rowIdx++) { data = GetLinearArray(_trainingData, rowIdx, _trainingData.Length - 2); value = modelCart.RunModelForSingleData(data); if (SupportFunctions.DoubleCompare(value, _trainingData[_indexTargetAttribute][rowIdx])) { error[rowIdx] = 0; } else { error[rowIdx] = 1; } //Compute data for mis-classification rate //This has to be done after error computation sumWeights += weight[rowIdx]; sumWeightedError += weight[rowIdx] * error[rowIdx]; } double misClassificationRate = (sumWeightedError + _epsilon) / (sumWeights + _epsilon); //Diagnostics Messages System.Diagnostics.Debug.WriteLine("\n\nSumWeights:" + sumWeights.ToString()); System.Diagnostics.Debug.WriteLine("Sum Weighted Error:" + sumWeightedError.ToString()); System.Diagnostics.Debug.WriteLine("MisClassification Rate:" + misClassificationRate.ToString()); //Compute Stage double stage = System.Math.Log( (1.0 - misClassificationRate) / misClassificationRate); System.Diagnostics.Debug.WriteLine("Stage:" + stage.ToString()); model.AddTree(stage, ii, modelCart); //Update the Weights for (int rowIdx = 0; rowIdx < _trainingData[0].Length; rowIdx++) { weight[rowIdx] = weight[rowIdx] * System.Math.Pow(System.Math.E, stage * error[rowIdx]); } }); //Number of trees return(model); }