예제 #1
0
        public static string SelectBestAxis(DecisionTreeSet set)
        {
            var baseEntropy = Entropy(set);

            //Console.WriteLine("Base entropy is {0}", baseEntropy);

            var bestInfoGain = 0.0;

            var uniqueFeaturesByAxis = set.UniqueFeatures().GroupBy(i => i.Axis).ToList();

            string bestAxisSplit = uniqueFeaturesByAxis.First().Key;

            foreach (var axis in uniqueFeaturesByAxis)
            {
                // calculate the total entropy based on splitting by this axis. The total entropy
                // is the sum of the entropy of each branch that would be created by this split

                var newEntropy = EntropyForSplitBranches(set, axis.ToList());

                var infoGain = baseEntropy - newEntropy;

                if (infoGain > bestInfoGain)
                {
                    bestInfoGain = infoGain;

                    bestAxisSplit = axis.Key;
                }
            }

            return(bestAxisSplit);
        }
예제 #2
0
 private static double EntropyForSplitBranches(DecisionTreeSet set, IEnumerable <Feature> allPossibleAxisValues)
 {
     return((from possibleValue in allPossibleAxisValues
             select set.Split(possibleValue) into subset
             let prob = (float)subset.NumberOfInstances / set.NumberOfInstances
                        select prob * Entropy(subset)).Sum());
 }
예제 #3
0
        public static double Entropy(DecisionTreeSet set)
        {
            var total = set.Instances.Count();

            var outputs = set.Instances.Select(i => i.Output).GroupBy(f => f.Value).ToList();

            var entropy = 0.0;

            foreach (var target in outputs)
            {
                var probability = (float)target.Count() / total;
                entropy -= probability * Math.Log(probability, 2);
            }

            return(entropy);
        }
예제 #4
0
        public DecisionTreeSet Parse(string file)
        {
            var set = new DecisionTreeSet();

            set.Instances = new List <Instance>();

            using (var stream = new StreamReader(file))
            {
                while (!stream.EndOfStream)
                {
                    var line = stream.ReadLine();
                    set.Instances.Add(ParseLine(line));
                }
            }

            return(set);
        }