예제 #1
0
        public ID3Node GetID3Node(IEnumerable<Tuple<List<String>, String>> data, List<int> UsedFeatures)
        {
            ID3Node node = new ID3Node();
            node.TargetClassProbabilities = TargetClassProbabilities(data);
            node.Entropy = Entropy(node.TargetClassProbabilities);

            if (node.Entropy > 0)
            {
                node.DecisionVariableIndex = GetDecisionVariable(node, data, UsedFeatures);

                if (node.DecisionVariableIndex != -1)
                {
                    List<String> DecisionVariableValues = ((from tuple in data
                                                  select tuple.Item1[node.DecisionVariableIndex]).Distinct()).ToList<String>();

                    if (DecisionVariableValues.Count > 1)
                    {
                        node.Children = new Dictionary<string, ID3Node>();

                        UsedFeatures.Add(node.DecisionVariableIndex);
                        foreach (String DecisionVariableValue in DecisionVariableValues)
                        {
                            var subset = from tuple in data
                                         where tuple.Item1[node.DecisionVariableIndex] == DecisionVariableValue
                                         select tuple;
                            node.Children[DecisionVariableValue] =  GetID3Node(subset,UsedFeatures);
                        }
                        UsedFeatures.Remove(node.DecisionVariableIndex);
                    }
                }
            }

            return node;
        }
예제 #2
0
        public int GetDecisionVariable(ID3Node node, IEnumerable<Tuple<List<String>, String>> data, List<int> UsedFeatures)
        {
            int NumFeatures = data.First().Item1.Count;

            if (UsedFeatures.Count < NumFeatures)
            {
                Dictionary<int, double> Gain = new Dictionary<int, double>();

                for (int i = 0; i < NumFeatures; i++)
                {
                    if (UsedFeatures.Contains(i))
                        continue;

                    Gain[i] = InformationGain(node.Entropy, data, i);
                }

                double MaxGain = Gain.Values.Max();
                foreach (int i in Gain.Keys)
                {
                    if (Gain[i] == MaxGain)
                        return i;
                }
            }
            return -1;
        }
예제 #3
0
파일: ID3.cs 프로젝트: aaadith/algohub
        public int GetDecisionVariable(ID3Node node, IEnumerable <Tuple <List <String>, String> > data, List <int> UsedFeatures)
        {
            int NumFeatures = data.First().Item1.Count;

            if (UsedFeatures.Count < NumFeatures)
            {
                Dictionary <int, double> Gain = new Dictionary <int, double>();

                for (int i = 0; i < NumFeatures; i++)
                {
                    if (UsedFeatures.Contains(i))
                    {
                        continue;
                    }

                    Gain[i] = InformationGain(node.Entropy, data, i);
                }

                double MaxGain = Gain.Values.Max();
                foreach (int i in Gain.Keys)
                {
                    if (Gain[i] == MaxGain)
                    {
                        return(i);
                    }
                }
            }
            return(-1);
        }
예제 #4
0
파일: ID3.cs 프로젝트: aaadith/algohub
        public ID3Node GetID3Node(IEnumerable <Tuple <List <String>, String> > data, List <int> UsedFeatures)
        {
            ID3Node node = new ID3Node();

            node.TargetClassProbabilities = TargetClassProbabilities(data);
            node.Entropy = Entropy(node.TargetClassProbabilities);

            if (node.Entropy > 0)
            {
                node.DecisionVariableIndex = GetDecisionVariable(node, data, UsedFeatures);

                if (node.DecisionVariableIndex != -1)
                {
                    List <String> DecisionVariableValues = ((from tuple in data
                                                             select tuple.Item1[node.DecisionVariableIndex]).Distinct()).ToList <String>();

                    if (DecisionVariableValues.Count > 1)
                    {
                        node.Children = new Dictionary <string, ID3Node>();

                        UsedFeatures.Add(node.DecisionVariableIndex);
                        foreach (String DecisionVariableValue in DecisionVariableValues)
                        {
                            var subset = from tuple in data
                                         where tuple.Item1[node.DecisionVariableIndex] == DecisionVariableValue
                                         select tuple;
                            node.Children[DecisionVariableValue] = GetID3Node(subset, UsedFeatures);
                        }
                        UsedFeatures.Remove(node.DecisionVariableIndex);
                    }
                }
            }

            return(node);
        }
예제 #5
0
파일: ID3.cs 프로젝트: aaadith/algohub
        public void Train(String TrainingDataFile, char FieldSeparator, bool ContainsHeader, int TargetVariablePositionZeroBased)
        {
            IEnumerable <Tuple <List <String>, String> > data =
                from lines in File.ReadAllLines(TrainingDataFile)
                .Skip(ContainsHeader ? 1 : 0)
                let columns = lines.Split(new[] { FieldSeparator }).ToList <String>()
                              let target = columns[TargetVariablePositionZeroBased]
                                           let features = GetFeaturesAfterPruning(columns, TargetVariablePositionZeroBased)
                                                          select new Tuple <List <String>, String>(features, target);



            List <int> UsedFeatures = new List <int>();

            Root = GetID3Node(data, UsedFeatures);
        }
예제 #6
0
        public void Train(String TrainingDataFile, char FieldSeparator, bool ContainsHeader, int TargetVariablePositionZeroBased)
        {
            IEnumerable<Tuple<List<String>, String>> data =
                       from lines in File.ReadAllLines(TrainingDataFile)
                                        .Skip(ContainsHeader ? 1 : 0)
                       let columns = lines.Split(new[] { FieldSeparator }).ToList<String>()
                       let target = columns[TargetVariablePositionZeroBased]
                       let features = GetFeaturesAfterPruning(columns, TargetVariablePositionZeroBased)
                       select new Tuple<List<String>, String>(features, target);

            List<int> UsedFeatures = new List<int>();
            Root = GetID3Node(data, UsedFeatures);
        }