//Make private member so that stack is not full protected virtual DecisionTreeNode buildChildNodes(ConcurrentBag <long> trainingDataRowIndices, double value, DecisionTreeNode dtnParent) { DecisionTreeNode dtn = new DecisionTreeNode(dtnParent); //Get all rows in Training Data FilteredData fd = getFilteredDataForNode(dtn, value, trainingDataRowIndices); //Check if all target examples are same or not //Their Entropy will be 0 if (fd.NumberOfRows <= _minimumNumberPerNode || isTargetDataSame(fd.FilteredDataValues)) //Attributes is empty { setAsTargetAttributeNode(fd.FilteredDataValues, dtn); return(dtn);//No more children if attributeIndex is target Attributes } //Set data for current node SplittedAttributeData ed = splitDataOnUnivariateCriterion(fd.FilteredDataValues); //Check for positive and negative examples dtn.setAttributeValues(ed.AttributeIndex, _attributeHeaders[ed.AttributeIndex]); ConcurrentBag <long> newTrainingDataRowIndices = fd.TrainingDataRowIndices; fd = null; //Free Memory -> Clean up data, no longer needed //Key has values foreach (double key in ed.Freqs.Keys) { if (key != 999) //Dont add for missing values { dtn.addChild(key, buildChildNodes(newTrainingDataRowIndices, key, dtn)); //Key has value } } return(dtn); }
//Make private member so that stack is not full protected DecisionTreeNode BuildChildNodes(ConcurrentBag <long> trainingDataRowIndices, double value, DecisionTreeNode dtnParent, bool isLessThan) { DecisionTreeNode dtn = new DecisionTreeNode(dtnParent); //Get all rows in Training Data FilteredData fd = GetFilteredDataForNode(dtn, value, trainingDataRowIndices, isLessThan); //Stopping Criterion //Check if minimum number of nodes are there //OR all target values are same if (fd.NumberOfRows <= _minimumNumberPerNode || isTargetDataSame(fd.FilteredDataValues) || //Attributes is empty (dtnParent != null && fd.TrainingDataRowIndices.Count == trainingDataRowIndices.Count) || //implies no split happened) GetAdditionalStoppingCondition(dtn)) { if (fd.NumberOfRows == 0) //Special case, use original data as node { fd = convertRowIndicesToFilteredData(trainingDataRowIndices); } setAsTargetAttributeNode(fd.FilteredDataValues, dtn); return(dtn); //No more children if min attributes reached } //Set data for current node SplittedAttributeData ed = splitDataOnUnivariateCriterion(fd.FilteredDataValues); //Check for positive and negative examples dtn.setAttributeValues(ed.AttributeIndex, _attributeHeaders[ed.AttributeIndex]); //Store value in column ed.AttributeIndex based on which split was done dtn.Value = ed.SplittingCriteriaValue; ConcurrentBag <long> newTrainingDataRowIndices = fd.TrainingDataRowIndices; fd = null; //Free Memory -> Clean up data, no longer needed //Key has values //Add left node if (ed.SplittingCriteriaValue != _missingValue) //Dont add for missing values { //0 if for left, 1 is for right. //There wont be any conflict since each node will have only 2 children //DecisionTreeNode dtnChild = dtn.addChild(0, BuildChildNodes(newTrainingDataRowIndices, ed.SplittingCriteriaValue, dtn, true)); //Key has value //dtnChild = dtn.addChild(1, BuildChildNodes(newTrainingDataRowIndices, ed.SplittingCriteriaValue, dtn, false)); //Key has value } return(dtn); }