Exemplo n.º 1
0
        public static GainResult getGain(List <Example> trainingSet, String attrName, List <String> classNames)
        {
            GainResult gainResult = new GainResult();

            // sort learning set by attr values
            trainingSet.Sort(new ExampleComparator(attrName));
            // for each possible partitioning, compute gain for attr
            double minEntropy = 0xffffff;

            for (int i = 0; i < trainingSet.Count - 1; i++)
            {
                if (trainingSet[i].getResult().Equals(trainingSet[i + 1].getResult()) == false)
                {
                    double tempEntropy = getEntropy(trainingSet, classNames, attrName, i);
                    if (minEntropy > tempEntropy)
                    {
                        gainResult.setPartitionIndex(i);
                        minEntropy = tempEntropy;
                    }
                }
            }
            gainResult.setGain(minEntropy);
            return(gainResult);
        }
Exemplo n.º 2
0
        public static void buildTree(DTNode root)
        {
            if (isTrivial(root) == true)
            {
                root.setResult(root.getTrainingSet()[0].getResult());
            }
            else if (root.getAttrNames().Count == 0)
            {
                // set as a result the most frequent class in training set
                Dictionary <String, int> classCount = new Dictionary <String, int>();
                foreach (String className in root.getClasses())
                {
                    classCount.Add(className, 0);
                }
                // get entropy of all training set
                foreach (Example example in root.getTrainingSet())
                {
                    int value;
                    if (classCount.TryGetValue(example.getResult(), out value))
                    {
                        classCount[example.getResult()] = value + 1;
                    }
                }
                int    maxCount    = 0;
                String classResult = "";
                foreach (KeyValuePair <String, int> entry in classCount)
                {
                    if (maxCount < entry.Value)
                    {
                        maxCount    = entry.Value;
                        classResult = entry.Key;
                    }
                }

                root.setResult(classResult);
            }
            else
            {
                // find attribute with maximum gain
                GainResult gainResult    = new GainResult();
                GainResult gainResultMax = new GainResult();
                gainResultMax.setGain(0.0);
                int resultIndex = 0;
                for (int i = 0; i < root.getAttrNames().Count; i++)
                {
                    gainResult = getGain(root.getTrainingSet(), root.getAttrNames()[i], root.getClasses());
                    gainResult.setGain(getEntropy(root.getTrainingSet(), root.getClasses(), root.getAttrNames()[i], -1) -
                                       gainResult.getGain());
                    if (gainResultMax.getGain() < gainResult.getGain())
                    {
                        gainResultMax.setGain(gainResult.getGain());
                        gainResultMax.setPartitionIndex(gainResult.getPartitionIndex());
                        resultIndex = i;
                    }
                }
                String attributeNode = root.getAttrNames()[resultIndex];
                root.getAttrNames().RemoveAt(resultIndex);
                // sort training list by the attribute with maximum gain
                root.getTrainingSet().Sort(new ExampleComparator(attributeNode));
                // form children
                for (int i = 0; i < 2; i++)
                {
                    List <String> newAttrNames = new List <String>();
                    foreach (String str in root.getAttrNames())
                    {
                        newAttrNames.Add(str);
                    }

                    List <String> newClassNames = new List <String>();
                    foreach (String str in root.getClasses())
                    {
                        newClassNames.Add(str);
                    }
                    DTNode         child          = new DTNode(new List <Example>(), newAttrNames, newClassNames);
                    List <Example> newTrainingSet = new List <Example>();
                    Condition      condition      = new Condition();
                    if (i == 0)
                    {
                        for (int p = 0; p <= gainResultMax.getPartitionIndex(); p++)
                        {
                            Example example    = root.getTrainingSet()[p];
                            Example newExample = new Example();
                            // copy result
                            newExample.setResult(example.getResult());
                            // copy attribute values
                            Dictionary <String, Double> newAttributes = new Dictionary <String, Double>();
                            foreach (KeyValuePair <String, Double> entry in example.getAttributes())
                            {
                                newAttributes.Add(entry.Key, entry.Value);
                            }
                            newExample.setAttributes(newAttributes);
                            newTrainingSet.Add(newExample);
                        }
                        condition.setOperator(Constants.conditionType.LT);
                    }
                    else
                    {
                        for (int p = gainResultMax.getPartitionIndex() + 1; p < root.getTrainingSet().Count; p++)
                        {
                            Example example    = root.getTrainingSet()[p];
                            Example newExample = new Example();
                            // copy result
                            newExample.setResult(example.getResult());
                            // copy attribute values
                            Dictionary <String, Double> newAttributes = new Dictionary <String, Double>();
                            foreach (KeyValuePair <String, Double> entry in example.getAttributes())
                            {
                                newAttributes.Add(entry.Key, entry.Value);
                            }
                            newExample.setAttributes(newAttributes);
                            newTrainingSet.Add(newExample);
                        }
                        condition.setOperator(Constants.conditionType.GEQT);
                    }
                    condition.setMember1(attributeNode);
                    double leftValue  = root.getTrainingSet()[gainResultMax.getPartitionIndex()].getValue(attributeNode);
                    double rightValue = root.getTrainingSet()[gainResultMax.getPartitionIndex() + 1].getValue(attributeNode);
                    condition.setMember2((leftValue + rightValue) / 2);
                    child.setTrainingSet(newTrainingSet);
                    root.getChildren().Add(child, condition);
                    buildTree(child);
                }
            }
        }
Exemplo n.º 3
0
     public static void buildTree(DTNode root) {
     if (isTrivial(root) == true) {
         root.setResult(root.getTrainingSet()[0].getResult());
     }
     else if (root.getAttrNames().Count == 0) {
         // set as a result the most frequent class in training set
         Dictionary<String, int> classCount = new Dictionary<String, int>(); 
         foreach (String className in root.getClasses()) {
             classCount.Add(className, 0);
         }
         // get entropy of all training set
         foreach (Example example in root.getTrainingSet()) {
             int value;
             if (classCount.TryGetValue(example.getResult(), out value)) 
             {
                 classCount[example.getResult()] = value+1;
             }
             
         }     
         int maxCount = 0;
         String classResult = "";
         foreach(KeyValuePair<String, int> entry in classCount)
         {
             if (maxCount < entry.Value) {
                 maxCount = entry.Value;
                 classResult = entry.Key;
             }
         }
         
         root.setResult(classResult);
     }
     else {
         // find attribute with maximum gain
         GainResult gainResult = new GainResult();
         GainResult gainResultMax = new GainResult();
         gainResultMax.setGain(0.0);
         int resultIndex = 0;
         for (int i=0; i<root.getAttrNames().Count; i++) {
             gainResult = getGain(root.getTrainingSet(), root.getAttrNames()[i], root.getClasses());
             gainResult.setGain(getEntropy(root.getTrainingSet(), root.getClasses(), root.getAttrNames()[i], -1) - 
                     gainResult.getGain());
             if (gainResultMax.getGain() < gainResult.getGain()) {
                 gainResultMax.setGain(gainResult.getGain());
                 gainResultMax.setPartitionIndex(gainResult.getPartitionIndex());
                 resultIndex = i;
             }
         }
         String attributeNode = root.getAttrNames()[resultIndex];
         root.getAttrNames().RemoveAt(resultIndex);
         // sort training list by the attribute with maximum gain
         root.getTrainingSet().Sort(new ExampleComparator(attributeNode));
         // form children
         for (int i=0; i<2; i++)
         {
             List<String> newAttrNames = new List<String>();
             foreach (String str in root.getAttrNames()) {
                 newAttrNames.Add(str);
             }
             
             List<String> newClassNames = new List<String>();
             foreach (String str in root.getClasses()) {
                 newClassNames.Add(str);
             }
             DTNode child = new DTNode(new List<Example>(), newAttrNames, newClassNames);
             List<Example> newTrainingSet = new List<Example>();
             Condition condition = new Condition();
             if (i == 0) {
                 for (int p=0; p <= gainResultMax.getPartitionIndex(); p++) {
                     Example example = root.getTrainingSet()[p];
                     Example newExample = new Example();
                     // copy result
                     newExample.setResult(example.getResult());
                     // copy attribute values
                     Dictionary<String, Double> newAttributes = new Dictionary<String, Double>();
                     foreach (KeyValuePair<String, Double> entry in example.getAttributes())
                     {
                         newAttributes.Add(entry.Key, entry.Value);
                     }
                     newExample.setAttributes(newAttributes);                        
                     newTrainingSet.Add(newExample); 
                 }
                 condition.setOperator(Constants.conditionType.LT);                    
             }
             else {
                 for (int p=gainResultMax.getPartitionIndex()+1; p<root.getTrainingSet().Count; p++) {
                     Example example = root.getTrainingSet()[p];
                     Example newExample = new Example();
                     // copy result
                     newExample.setResult(example.getResult());
                     // copy attribute values
                     Dictionary<String, Double> newAttributes = new Dictionary<String, Double>();
                     foreach (KeyValuePair<String, Double> entry in example.getAttributes())
                     {
                         newAttributes.Add(entry.Key, entry.Value);
                     }
                     newExample.setAttributes(newAttributes);
                     newTrainingSet.Add(newExample);
                 }
                 condition.setOperator(Constants.conditionType.GEQT);
             }
             condition.setMember1(attributeNode);
             double leftValue = root.getTrainingSet()[gainResultMax.getPartitionIndex()].getValue(attributeNode);
             double rightValue = root.getTrainingSet()[gainResultMax.getPartitionIndex()+1].getValue(attributeNode);
             condition.setMember2((leftValue+rightValue)/2);                
             child.setTrainingSet(newTrainingSet);
             root.getChildren().Add(child, condition);                
             buildTree(child);
         }
     }
 }
Exemplo n.º 4
0
 public static GainResult getGain(List<Example> trainingSet, String attrName, List<String> classNames)
 {
     GainResult gainResult = new GainResult();
     // sort learning set by attr values
     trainingSet.Sort(new ExampleComparator(attrName));
     // for each possible partitioning, compute gain for attr
     double minEntropy = 0xffffff;
     for (int i = 0; i < trainingSet.Count - 1; i++)
     {
         if (trainingSet[i].getResult().Equals(trainingSet[i+1].getResult()) == false)
         {
             double tempEntropy = getEntropy(trainingSet, classNames, attrName, i);
             if (minEntropy > tempEntropy)
             {
                 gainResult.setPartitionIndex(i);
                 minEntropy = tempEntropy;
             }
         }
     }
     gainResult.setGain(minEntropy);
     return gainResult;
 }