Ejemplo n.º 1
0
 protected void setAsTargetAttributeNode(double[][] filteredValues,
                                         DecisionTreeNode dtn)
 {
     /*if (filteredValues[0].Length < 1)
      * {
      *  dtn.setAttributeValues(-1, "");//Denotes and invalid node
      * }
      * else*/
     {
         dtn.setAttributeValues(_indexTargetAttribute,
                                _attributeHeaders[_indexTargetAttribute]);
         dtn.Value = getMostFrequentValueForIndex(filteredValues, _indexTargetAttribute);
     }
     return;
 }
Ejemplo n.º 2
0
        //Make private member so that stack is not full
        protected virtual DecisionTreeNode buildChildNodes(ConcurrentBag <long> trainingDataRowIndices,
                                                           double value,
                                                           DecisionTreeNode dtnParent)
        {
            DecisionTreeNode dtn = new DecisionTreeNode(dtnParent);

            //Get all rows in Training Data
            FilteredData fd = getFilteredDataForNode(dtn, value,
                                                     trainingDataRowIndices);

            //Check if all target examples are same or not
            //Their Entropy will be 0

            if (fd.NumberOfRows <= _minimumNumberPerNode || isTargetDataSame(fd.FilteredDataValues)) //Attributes is empty
            {
                setAsTargetAttributeNode(fd.FilteredDataValues, dtn);
                return(dtn);//No more children if attributeIndex is target Attributes
            }

            //Set data for current node
            SplittedAttributeData ed =
                splitDataOnUnivariateCriterion(fd.FilteredDataValues);

            //Check for positive and negative examples
            dtn.setAttributeValues(ed.AttributeIndex,
                                   _attributeHeaders[ed.AttributeIndex]);

            ConcurrentBag <long> newTrainingDataRowIndices =
                fd.TrainingDataRowIndices;

            fd = null;  //Free Memory -> Clean up data, no longer needed
            //Key has values
            foreach (double key in ed.Freqs.Keys)
            {
                if (key != 999) //Dont add for missing values
                {
                    dtn.addChild(key, buildChildNodes(newTrainingDataRowIndices,
                                                      key,
                                                      dtn)); //Key has value
                }
            }
            return(dtn);
        }
Ejemplo n.º 3
0
        //Make private member so that stack is not full
        protected DecisionTreeNode BuildChildNodes(ConcurrentBag <long> trainingDataRowIndices,
                                                   double value,
                                                   DecisionTreeNode dtnParent,
                                                   bool isLessThan)
        {
            DecisionTreeNode dtn = new DecisionTreeNode(dtnParent);

            //Get all rows in Training Data
            FilteredData fd = GetFilteredDataForNode(dtn, value,
                                                     trainingDataRowIndices,
                                                     isLessThan);

            //Stopping Criterion
            //Check if minimum number of nodes are there
            //OR all target values are same
            if (fd.NumberOfRows <= _minimumNumberPerNode ||
                isTargetDataSame(fd.FilteredDataValues) ||     //Attributes is empty
                (dtnParent != null && fd.TrainingDataRowIndices.Count == trainingDataRowIndices.Count) ||     //implies no split happened)
                GetAdditionalStoppingCondition(dtn))
            {
                if (fd.NumberOfRows == 0)     //Special case, use original data as node
                {
                    fd = convertRowIndicesToFilteredData(trainingDataRowIndices);
                }
                setAsTargetAttributeNode(fd.FilteredDataValues, dtn);
                return(dtn);   //No more children if min attributes reached
            }

            //Set data for current node
            SplittedAttributeData ed =
                splitDataOnUnivariateCriterion(fd.FilteredDataValues);

            //Check for positive and negative examples
            dtn.setAttributeValues(ed.AttributeIndex,
                                   _attributeHeaders[ed.AttributeIndex]);

            //Store value in column ed.AttributeIndex based on which split  was done
            dtn.Value = ed.SplittingCriteriaValue;

            ConcurrentBag <long> newTrainingDataRowIndices =
                fd.TrainingDataRowIndices;

            fd = null;  //Free Memory -> Clean up data, no longer needed
                        //Key has values

            //Add left node
            if (ed.SplittingCriteriaValue != _missingValue) //Dont add for missing values
            {
                //0 if for left, 1 is for right.
                //There wont be any conflict since each node will have only 2 children
                //DecisionTreeNode dtnChild =
                dtn.addChild(0, BuildChildNodes(newTrainingDataRowIndices,
                                                ed.SplittingCriteriaValue,
                                                dtn, true)); //Key has value

                //dtnChild =
                dtn.addChild(1, BuildChildNodes(newTrainingDataRowIndices,
                                                ed.SplittingCriteriaValue,
                                                dtn, false)); //Key has value
            }

            return(dtn);
        }